diff --git a/.clang-tidy b/.clang-tidy index 9cece0de812b8..06bb0f18e9d2e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,4 +1,16 @@ -Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-const-correctness,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,-misc-no-recursion,-misc-use-anonymous-namespace,readability-identifier-naming,-misc-include-cleaner' +Checks: > + -*, + clang-diagnostic-*, + llvm-*, + misc-*, + -misc-const-correctness, + -misc-include-cleaner, + -misc-no-recursion, + -misc-non-private-member-variables-in-classes, + -misc-unused-parameters, + -misc-use-anonymous-namespace, + readability-identifier-naming + CheckOptions: - key: readability-identifier-naming.ClassCase value: CamelCase diff --git a/.github/workflows/containers/github-action-ci/Dockerfile b/.github/workflows/containers/github-action-ci/Dockerfile index 197bf083a20c0..efe08ebc221c5 100644 --- a/.github/workflows/containers/github-action-ci/Dockerfile +++ b/.github/workflows/containers/github-action-ci/Dockerfile @@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:24.04 as base ENV LLVM_SYSROOT=/opt/llvm FROM base as stage1-toolchain -ENV LLVM_VERSION=20.1.4 +ENV LLVM_VERSION=20.1.8 RUN apt-get update && \ apt-get install -y \ diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index f0bdf6c0b5899..ec937de02ca1a 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -36,8 +36,7 @@ concurrency: jobs: stage1: if: github.repository_owner == 'llvm' - runs-on: libcxx-self-hosted-linux - container: ghcr.io/llvm/libcxx-linux-builder:b060022103f551d8ca1dad84122ef73927c86512 + runs-on: llvm-premerge-libcxx-runners continue-on-error: false strategy: fail-fast: false @@ -74,8 +73,7 @@ jobs: **/crash_diagnostics/* stage2: if: github.repository_owner == 'llvm' - runs-on: libcxx-self-hosted-linux - container: ghcr.io/llvm/libcxx-linux-builder:2b57ebb50b6d418e70382e655feaa619b558e254 + runs-on: llvm-premerge-libcxx-runners needs: [ stage1 ] continue-on-error: false strategy: @@ -149,21 +147,20 @@ jobs: 'generic-static', 'bootstrapping-build' ] - machine: [ 'libcxx-self-hosted-linux' ] + machine: [ 'llvm-premerge-libcxx-runners' ] include: - config: 'generic-cxx26' - machine: libcxx-self-hosted-linux + machine: llvm-premerge-libcxx-runners - config: 'generic-asan' - machine: libcxx-self-hosted-linux + machine: llvm-premerge-libcxx-runners - config: 'generic-tsan' - machine: libcxx-self-hosted-linux + machine: llvm-premerge-libcxx-runners - config: 'generic-ubsan' - machine: libcxx-self-hosted-linux + machine: llvm-premerge-libcxx-runners # Use a larger machine for MSAN to avoid timeout and memory allocation issues. - config: 'generic-msan' - machine: libcxx-self-hosted-linux + machine: llvm-premerge-libcxx-runners runs-on: ${{ matrix.machine }} - container: ghcr.io/llvm/libcxx-linux-builder:2b57ebb50b6d418e70382e655feaa619b558e254 steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: ${{ matrix.config }} diff --git a/.github/workflows/libcxx-restart-preempted-jobs.yaml b/.github/workflows/libcxx-restart-preempted-jobs.yaml deleted file mode 100644 index accb84efb5c90..0000000000000 --- a/.github/workflows/libcxx-restart-preempted-jobs.yaml +++ /dev/null @@ -1,158 +0,0 @@ -name: Restart Preempted Libc++ Workflow - -# The libc++ builders run on preemptable VMs, which can be shutdown at any time. -# This workflow identifies when a workflow run was canceled due to the VM being preempted, -# and restarts the workflow run. - -# We identify a canceled workflow run by checking the annotations of the check runs in the check suite, -# which should contain the message "The runner has received a shutdown signal." - -# Note: If a job is both preempted and also contains a non-preemption failure, we do not restart the workflow. - -on: - workflow_run: - workflows: [Build and Test libc\+\+] - types: - - completed - -permissions: - contents: read - -jobs: - restart: - if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure') - name: "Restart Job" - permissions: - statuses: read - checks: write - actions: write - runs-on: ubuntu-24.04 - steps: - - name: "Restart Job" - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1 - with: - script: | - // The "The run was canceled by" message comes from a user manually canceling a workflow - // the "higher priority" message comes from github canceling a workflow because the user updated the change. - // And the "exit code 1" message indicates a genuine failure. - const failure_regex = /(Process completed with exit code 1.)/ - const preemption_regex = /(The runner has received a shutdown signal)|(The operation was canceled)/ - - const wf_run = context.payload.workflow_run - core.notice(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`) - - - async function create_check_run(conclusion, message) { - // Create a check run on the given workflow run to indicate if - // we are restarting the workflow or not. - if (conclusion != 'success' && conclusion != 'skipped' && conclusion != 'neutral') { - core.setFailed('Invalid conclusion: ' + conclusion) - } - await github.rest.checks.create({ - owner: context.repo.owner, - repo: context.repo.repo, - name: 'Restart Preempted Job', - head_sha: wf_run.head_sha, - status: 'completed', - conclusion: conclusion, - output: { - title: 'Restarted Preempted Job', - summary: message - } - }) - } - - console.log('Listing check runs for suite') - const check_suites = await github.rest.checks.listForSuite({ - owner: context.repo.owner, - repo: context.repo.repo, - check_suite_id: context.payload.workflow_run.check_suite_id, - per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better. - }) - - check_run_ids = []; - for (check_run of check_suites.data.check_runs) { - console.log('Checking check run: ' + check_run.id); - if (check_run.status != 'completed') { - console.log('Check run was not completed. Skipping.'); - continue; - } - if (check_run.conclusion != 'failure') { - console.log('Check run had conclusion: ' + check_run.conclusion + '. Skipping.'); - continue; - } - check_run_ids.push(check_run.id); - } - - has_preempted_job = false; - - for (check_run_id of check_run_ids) { - console.log('Listing annotations for check run: ' + check_run_id); - - annotations = await github.rest.checks.listAnnotations({ - owner: context.repo.owner, - repo: context.repo.repo, - check_run_id: check_run_id - }) - - // For temporary debugging purposes to see the structure of the annotations. - console.log(annotations); - - has_failed_job = false; - saved_failure_message = null; - - for (annotation of annotations.data) { - if (annotation.annotation_level != 'failure') { - continue; - } - - const preemption_match = annotation.message.match(preemption_regex); - - if (preemption_match != null) { - console.log('Found preemption message: ' + annotation.message); - has_preempted_job = true; - } - - const failure_match = annotation.message.match(failure_regex); - if (failure_match != null) { - has_failed_job = true; - saved_failure_message = annotation.message; - } - } - if (has_failed_job && (! has_preempted_job)) { - // We only want to restart the workflow if all of the failures were due to preemption. - // We don't want to restart the workflow if there were other failures. - // - // However, libcxx runners running inside docker containers produce both a preemption message and failure message. - // - // The desired approach is to ignore failure messages which appear on the same job as a preemption message - // (An job is a single run with a specific configuration, ex generic-gcc, gcc-14). - // - // However, it's unclear that this code achieves the desired approach, and it may ignore all failures - // if a preemption message is found at all on any run. - // - // For now, it's more important to restart preempted workflows than to avoid restarting workflows with - // non-preemption failures. - // - // TODO Figure this out. - core.notice('Choosing not to rerun workflow because we found a non-preemption failure' + - 'Failure message: "' + saved_failure_message + '"'); - await create_check_run('skipped', 'Choosing not to rerun workflow because we found a non-preemption failure\n' - + 'Failure message: ' + saved_failure_message) - return; - } - } - - if (!has_preempted_job) { - core.notice('No preempted jobs found. Not restarting workflow.'); - await create_check_run('neutral', 'No preempted jobs found. Not restarting workflow.') - return; - } - - core.notice("Restarted workflow: " + context.payload.workflow_run.id); - await github.rest.actions.reRunWorkflowFailedJobs({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.payload.workflow_run.id - }) - await create_check_run('success', 'Restarted workflow run due to preempted job') diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index 05d69861e1841..70bcaafbd0cf3 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -55,7 +55,7 @@ jobs: - name: Install clang-format uses: aminya/setup-cpp@17c11551771948abc5752bbf3183482567c7caf0 # v1.1.1 with: - clangformat: 20.1.5 + clangformat: 20.1.8 - name: Setup Python env uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml index 3b7751629564d..f7a48304b82b0 100644 --- a/.github/workflows/premerge.yaml +++ b/.github/workflows/premerge.yaml @@ -63,6 +63,7 @@ jobs: ./.ci/monolithic-linux.sh "${projects_to_build}" "${project_check_targets}" "${runtimes_to_build}" "${runtimes_check_targets}" "${runtimes_check_targets_needs_reconfig}" - name: Upload Artifacts + if: '!cancelled()' uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: Premerge Artifacts (Linux) @@ -113,6 +114,7 @@ jobs: call C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 -host_arch=amd64 bash .ci/monolithic-windows.sh "${{ steps.vars.outputs.windows-projects }}" "${{ steps.vars.outputs.windows-check-targets }}" - name: Upload Artifacts + if: '!cancelled()' uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: Premerge Artifacts (Windows) diff --git a/clang-tools-extra/clang-doc/BitcodeReader.cpp b/clang-tools-extra/clang-doc/BitcodeReader.cpp index f756ae6d897c8..dce34a8434ff8 100644 --- a/clang-tools-extra/clang-doc/BitcodeReader.cpp +++ b/clang-tools-extra/clang-doc/BitcodeReader.cpp @@ -180,6 +180,8 @@ static llvm::Error parseRecord(const Record &R, unsigned ID, return decodeRecord(R, I->TagType, Blob); case RECORD_IS_TYPE_DEF: return decodeRecord(R, I->IsTypeDef, Blob); + case RECORD_MANGLED_NAME: + return decodeRecord(R, I->MangledName, Blob); default: return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid field for RecordInfo"); diff --git a/clang-tools-extra/clang-doc/BitcodeWriter.cpp b/clang-tools-extra/clang-doc/BitcodeWriter.cpp index 3cc0d4ad332f0..eed23726e17bf 100644 --- a/clang-tools-extra/clang-doc/BitcodeWriter.cpp +++ b/clang-tools-extra/clang-doc/BitcodeWriter.cpp @@ -189,6 +189,7 @@ static const llvm::IndexedMap {RECORD_LOCATION, {"Location", &genLocationAbbrev}}, {RECORD_TAG_TYPE, {"TagType", &genIntAbbrev}}, {RECORD_IS_TYPE_DEF, {"IsTypeDef", &genBoolAbbrev}}, + {RECORD_MANGLED_NAME, {"MangledName", &genStringAbbrev}}, {BASE_RECORD_USR, {"USR", &genSymbolIdAbbrev}}, {BASE_RECORD_NAME, {"Name", &genStringAbbrev}}, {BASE_RECORD_PATH, {"Path", &genStringAbbrev}}, @@ -271,7 +272,8 @@ static const std::vector>> // Record Block {BI_RECORD_BLOCK_ID, {RECORD_USR, RECORD_NAME, RECORD_PATH, RECORD_DEFLOCATION, - RECORD_LOCATION, RECORD_TAG_TYPE, RECORD_IS_TYPE_DEF}}, + RECORD_LOCATION, RECORD_TAG_TYPE, RECORD_IS_TYPE_DEF, + RECORD_MANGLED_NAME}}, // BaseRecord Block {BI_BASE_RECORD_BLOCK_ID, {BASE_RECORD_USR, BASE_RECORD_NAME, BASE_RECORD_PATH, @@ -616,6 +618,7 @@ void ClangDocBitcodeWriter::emitBlock(const RecordInfo &I) { emitRecord(I.USR, RECORD_USR); emitRecord(I.Name, RECORD_NAME); emitRecord(I.Path, RECORD_PATH); + emitRecord(I.MangledName, RECORD_MANGLED_NAME); for (const auto &N : I.Namespace) emitBlock(N, FieldId::F_namespace); for (const auto &CI : I.Description) diff --git a/clang-tools-extra/clang-doc/BitcodeWriter.h b/clang-tools-extra/clang-doc/BitcodeWriter.h index d09ec4ca34006..501af12582a8e 100644 --- a/clang-tools-extra/clang-doc/BitcodeWriter.h +++ b/clang-tools-extra/clang-doc/BitcodeWriter.h @@ -126,6 +126,7 @@ enum RecordId { RECORD_LOCATION, RECORD_TAG_TYPE, RECORD_IS_TYPE_DEF, + RECORD_MANGLED_NAME, BASE_RECORD_USR, BASE_RECORD_NAME, BASE_RECORD_PATH, diff --git a/clang-tools-extra/clang-doc/JSONGenerator.cpp b/clang-tools-extra/clang-doc/JSONGenerator.cpp index 0e1a0cc347e45..6fdc7196e9095 100644 --- a/clang-tools-extra/clang-doc/JSONGenerator.cpp +++ b/clang-tools-extra/clang-doc/JSONGenerator.cpp @@ -386,6 +386,7 @@ static void serializeInfo(const RecordInfo &I, json::Object &Obj, Obj["FullName"] = I.FullName; Obj["TagType"] = getTagType(I.TagType); Obj["IsTypedef"] = I.IsTypeDef; + Obj["MangledName"] = I.MangledName; if (!I.Children.Functions.empty()) { json::Value PubFunctionsArray = Array(); @@ -491,6 +492,23 @@ static void serializeInfo(const NamespaceInfo &I, json::Object &Obj, serializeCommonChildren(I.Children, Obj, RepositoryUrl); } +static SmallString<16> determineFileName(Info *I, SmallString<128> &Path) { + SmallString<16> FileName; + if (I->IT == InfoType::IT_record) { + auto *RecordSymbolInfo = static_cast(I); + if (RecordSymbolInfo->MangledName.size() < 255) + FileName = RecordSymbolInfo->MangledName; + else + FileName = toStringRef(toHex(RecordSymbolInfo->USR)); + } else if (I->IT == InfoType::IT_namespace && I->Name != "") + // Serialize the global namespace as index.json + FileName = I->Name; + else + FileName = I->getFileBaseName(); + sys::path::append(Path, FileName + ".json"); + return FileName; +} + Error JSONGenerator::generateDocs( StringRef RootDir, llvm::StringMap> Infos, const ClangDocContext &CDCtx) { @@ -501,7 +519,6 @@ Error JSONGenerator::generateDocs( SmallString<128> Path; sys::path::native(RootDir, Path); - sys::path::append(Path, Info->getRelativeFilePath("")); if (!CreatedDirs.contains(Path)) { if (std::error_code Err = sys::fs::create_directories(Path); Err != std::error_code()) @@ -509,7 +526,7 @@ Error JSONGenerator::generateDocs( CreatedDirs.insert(Path); } - sys::path::append(Path, Info->getFileBaseName() + ".json"); + SmallString<16> FileName = determineFileName(Info, Path); FileToInfos[Path].push_back(Info); } diff --git a/clang-tools-extra/clang-doc/Representation.cpp b/clang-tools-extra/clang-doc/Representation.cpp index 422a76d99e5b3..beaf314a04ae1 100644 --- a/clang-tools-extra/clang-doc/Representation.cpp +++ b/clang-tools-extra/clang-doc/Representation.cpp @@ -290,6 +290,8 @@ void SymbolInfo::merge(SymbolInfo &&Other) { auto *Last = llvm::unique(Loc); Loc.erase(Last, Loc.end()); mergeBase(std::move(Other)); + if (MangledName.empty()) + MangledName = std::move(Other.MangledName); } NamespaceInfo::NamespaceInfo(SymbolID USR, StringRef Name, StringRef Path) diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h index fe5cc48069d58..23f0e90daa27f 100644 --- a/clang-tools-extra/clang-doc/Representation.h +++ b/clang-tools-extra/clang-doc/Representation.h @@ -377,6 +377,7 @@ struct SymbolInfo : public Info { std::optional DefLoc; // Location where this decl is defined. llvm::SmallVector Loc; // Locations where this decl is declared. + SmallString<16> MangledName; bool IsStatic = false; }; diff --git a/clang-tools-extra/clang-doc/Serialize.cpp b/clang-tools-extra/clang-doc/Serialize.cpp index 6cc372ce98a6d..7a0e00c6d9c2d 100644 --- a/clang-tools-extra/clang-doc/Serialize.cpp +++ b/clang-tools-extra/clang-doc/Serialize.cpp @@ -12,6 +12,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Comment.h" #include "clang/AST/DeclFriend.h" +#include "clang/AST/Mangle.h" #include "clang/Index/USRGeneration.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/StringExtras.h" @@ -767,6 +768,17 @@ static void populateSymbolInfo(SymbolInfo &I, const T *D, const FullComment *C, I.DefLoc = Loc; else I.Loc.emplace_back(Loc); + + auto *Mangler = ItaniumMangleContext::create( + D->getASTContext(), D->getASTContext().getDiagnostics()); + std::string MangledName; + llvm::raw_string_ostream MangledStream(MangledName); + if (auto *CXXD = dyn_cast(D)) + Mangler->mangleCXXVTable(CXXD, MangledStream); + else + MangledStream << D->getNameAsString(); + I.MangledName = MangledName; + delete Mangler; } static void diff --git a/clang-tools-extra/clang-tidy/.clang-tidy b/clang-tools-extra/clang-tidy/.clang-tidy new file mode 100644 index 0000000000000..2443c979621da --- /dev/null +++ b/clang-tools-extra/clang-tidy/.clang-tidy @@ -0,0 +1,41 @@ +InheritParentConfig: true +Checks: > + bugprone-*, + -bugprone-assignment-in-if-condition, + -bugprone-branch-clone, + -bugprone-easily-swappable-parameters, + -bugprone-narrowing-conversions, + -bugprone-suspicious-stringview-data-usage, + -bugprone-unchecked-optional-access, + -bugprone-unused-return-value, + modernize-*, + -modernize-avoid-c-arrays, + -modernize-pass-by-value, + -modernize-use-auto, + -modernize-use-nodiscard, + -modernize-use-trailing-return-type, + performance-*, + -performance-enum-size, + -performance-move-const-arg, + -performance-no-int-to-ptr, + -performance-type-promotion-in-math-fn, + -performance-unnecessary-value-param, + readability-*, + -readability-avoid-nested-conditional-operator, + -readability-avoid-return-with-void-value, + -readability-braces-around-statements, + -readability-container-contains, + -readability-convert-member-functions-to-static, + -readability-else-after-return, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-implicit-bool-conversion, + -readability-isolate-declaration, + -readability-magic-numbers, + -readability-named-parameter, + -readability-qualified-auto, + -readability-redundant-declaration, + -readability-simplify-boolean-expr, + -readability-static-definition-in-anonymous-namespace, + -readability-suspicious-call-argument, + -readability-use-anyofallof diff --git a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h index b081c4c479b92..d91e6393a0e85 100644 --- a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h +++ b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.h @@ -22,7 +22,7 @@ class UseRangesCheck : public utils::UseRangesCheck { public: UseRangesCheck(StringRef Name, ClangTidyContext *Context); - void storeOptions(ClangTidyOptions::OptionMap &Options) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; ReplacerMap getReplacerMap() const override; diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp index 8cdd5d0a56467..b843e317c471d 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp @@ -145,7 +145,7 @@ static bool isLikelyTypo(llvm::ArrayRef Params, std::string ArgNameLowerStr = ArgName.lower(); StringRef ArgNameLower = ArgNameLowerStr; // The threshold is arbitrary. - unsigned UpperBound = (ArgName.size() + 2) / 3 + 1; + unsigned UpperBound = ((ArgName.size() + 2) / 3) + 1; unsigned ThisED = ArgNameLower.edit_distance( Params[ArgIndex]->getIdentifier()->getName().lower(), /*AllowReplacements=*/true, UpperBound); diff --git a/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.cpp index 28e8fe002d575..6565fa3f7c85b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/CrtpConstructorAccessibilityCheck.cpp @@ -129,13 +129,10 @@ void CrtpConstructorAccessibilityCheck::check( << HintFriend; } - auto WithFriendHintIfNeeded = - [&](const DiagnosticBuilder &Diag, - bool NeedsFriend) -> const DiagnosticBuilder & { + auto WithFriendHintIfNeeded = [&](const DiagnosticBuilder &Diag, + bool NeedsFriend) { if (NeedsFriend) Diag << HintFriend; - - return Diag; }; if (!CRTPDeclaration->hasUserDeclaredConstructor()) { diff --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp index 07116a7ff15f5..3c3024d538785 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp @@ -18,8 +18,7 @@ using namespace clang::ast_matchers; using clang::ast_matchers::internal::Matcher; using clang::tidy::utils::hasPtrOrReferenceInFunc; -namespace clang { -namespace tidy::bugprone { +namespace clang::tidy::bugprone { namespace { /// matches a Decl if it has a "no return" attribute of any kind @@ -327,5 +326,4 @@ void InfiniteLoopCheck::check(const MatchFinder::MatchResult &Result) { } } -} // namespace tidy::bugprone -} // namespace clang +} // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp index 01276af6c7d8f..879040177079a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp @@ -153,8 +153,7 @@ unsigned MacroRepeatedPPCallbacks::countArgumentExpansions( // Count argument. if (TII == Arg) { Current++; - if (Current > Max) - Max = Current; + Max = std::max(Max, Current); } } return Max; diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp index 88d2f2c388d07..88e048e65d4e8 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp @@ -370,16 +370,16 @@ void SizeofExpressionCheck::check(const MatchFinder::MatchResult &Result) { << E->getSourceRange(); } else if (Result.Nodes.getNodeAs("loop-expr")) { auto *SizeofArgTy = Result.Nodes.getNodeAs("sizeof-arg-type"); - if (const auto member = dyn_cast(SizeofArgTy)) - SizeofArgTy = member->getPointeeType().getTypePtr(); + if (const auto *Member = dyn_cast(SizeofArgTy)) + SizeofArgTy = Member->getPointeeType().getTypePtr(); const auto *SzOfExpr = Result.Nodes.getNodeAs("sizeof-expr"); - if (const auto type = dyn_cast(SizeofArgTy)) { + if (const auto *Type = dyn_cast(SizeofArgTy)) { // check if the array element size is larger than one. If true, // the size of the array is higher than the number of elements - CharUnits sSize = Ctx.getTypeSizeInChars(type->getElementType()); - if (!sSize.isOne()) { + CharUnits SSize = Ctx.getTypeSizeInChars(Type->getElementType()); + if (!SSize.isOne()) { diag(SzOfExpr->getBeginLoc(), "suspicious usage of 'sizeof' in the loop") << SzOfExpr->getSourceRange(); diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp index a45949314a4ca..0f2c18ae02663 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp @@ -248,7 +248,7 @@ void UnsafeFunctionsCheck::registerMatchers(MatchFinder *Finder) { FunctionNames.reserve(CustomFunctions.size()); for (const auto &Entry : CustomFunctions) - FunctionNames.push_back(Entry.Name); + FunctionNames.emplace_back(Entry.Name); auto CustomFunctionsMatcher = matchers::matchesAnyListedName(FunctionNames); diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp index 268b51f76a2c3..82fd3316b942a 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp @@ -81,7 +81,7 @@ AST_MATCHER_P(LambdaExpr, hasCaptureDefaultKind, LambdaCaptureDefault, Kind) { AST_MATCHER(VarDecl, hasIdentifier) { const IdentifierInfo *ID = Node.getIdentifier(); - return ID != NULL && !ID->isPlaceholder(); + return ID != nullptr && !ID->isPlaceholder(); } } // namespace diff --git a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp index cf81da816964f..9b2af2a8ca7d8 100644 --- a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp @@ -910,9 +910,9 @@ static bool areExprsSameMacroOrLiteral(const BinaryOperator *BinOp, if (Rsr.getBegin().isMacroID()) { // Both sides are macros so they are same macro or literal const llvm::StringRef L = Lexer::getSourceText( - CharSourceRange::getTokenRange(Lsr), SM, Context->getLangOpts(), 0); + CharSourceRange::getTokenRange(Lsr), SM, Context->getLangOpts()); const llvm::StringRef R = Lexer::getSourceText( - CharSourceRange::getTokenRange(Rsr), SM, Context->getLangOpts(), 0); + CharSourceRange::getTokenRange(Rsr), SM, Context->getLangOpts()); return areStringsSameIgnoreSpaces(L, R); } // Left is macro but right is not so they are not same macro or literal diff --git a/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.cpp index 9e4d184c4b6e1..e9b96c4016af6 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseConstraintsCheck.cpp @@ -161,7 +161,7 @@ matchTrailingTemplateParam(const FunctionTemplateDecl *FunctionTemplate) { const TemplateParameterList *TemplateParams = FunctionTemplate->getTemplateParameters(); - if (TemplateParams->size() == 0) + if (TemplateParams->empty()) return {}; const NamedDecl *LastParam = @@ -419,7 +419,7 @@ handleTrailingTemplateType(const FunctionTemplateDecl *FunctionTemplate, SourceRange RemovalRange; const TemplateParameterList *TemplateParams = FunctionTemplate->getTemplateParameters(); - if (!TemplateParams || TemplateParams->size() == 0) + if (!TemplateParams || TemplateParams->empty()) return {}; if (TemplateParams->size() == 1) { diff --git a/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h index 2f4cace653cf1..51327dab53e3d 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h @@ -22,7 +22,7 @@ class UseRangesCheck : public utils::UseRangesCheck { public: UseRangesCheck(StringRef CheckName, ClangTidyContext *Context); - void storeOptions(ClangTidyOptions::OptionMap &Options) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; ReplacerMap getReplacerMap() const override; diff --git a/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp index 9c2fc9e06fb45..52e9a9f8d49e0 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp @@ -104,8 +104,7 @@ getTemplateLockGuardTypeLoc(const TypeSourceInfo *SourceInfo) { static SourceRange getLockGuardRange(const TypeSourceInfo *SourceInfo) { const TypeLoc LockGuardTypeLoc = SourceInfo->getTypeLoc(); - return SourceRange(LockGuardTypeLoc.getBeginLoc(), - LockGuardTypeLoc.getEndLoc()); + return {LockGuardTypeLoc.getBeginLoc(), LockGuardTypeLoc.getEndLoc()}; } // Find the exact source range of the 'lock_guard' name token @@ -115,8 +114,8 @@ static SourceRange getLockGuardNameRange(const TypeSourceInfo *SourceInfo) { if (!TemplateLoc) return {}; - return SourceRange(TemplateLoc.getTemplateNameLoc(), - TemplateLoc.getLAngleLoc().getLocWithOffset(-1)); + return {TemplateLoc.getTemplateNameLoc(), + TemplateLoc.getLAngleLoc().getLocWithOffset(-1)}; } const static StringRef UseScopedLockMessage = diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp index cf2fa1955ca1b..5cabc6df21da9 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStdFormatCheck.cpp @@ -33,7 +33,7 @@ UseStdFormatCheck::UseStdFormatCheck(StringRef Name, ClangTidyContext *Context) areDiagsSelfContained()), MaybeHeaderToInclude(Options.get("FormatHeader")) { if (StrFormatLikeFunctions.empty()) - StrFormatLikeFunctions.push_back("absl::StrFormat"); + StrFormatLikeFunctions.emplace_back("absl::StrFormat"); if (!MaybeHeaderToInclude && ReplacementFormatFunction == "std::format") MaybeHeaderToInclude = ""; diff --git a/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.cpp index 3b847f51d2173..ffbdb025848d7 100644 --- a/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.cpp @@ -102,7 +102,7 @@ void PropertyDeclarationCheck::registerMatchers(MatchFinder *Finder) { void PropertyDeclarationCheck::check(const MatchFinder::MatchResult &Result) { const auto *MatchedDecl = Result.Nodes.getNodeAs("property"); - assert(MatchedDecl->getName().size() > 0); + assert(!MatchedDecl->getName().empty()); auto *DeclContext = MatchedDecl->getDeclContext(); auto *CategoryDecl = llvm::dyn_cast(DeclContext); diff --git a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp index 30df40bda57d8..d6784d0e8fba8 100644 --- a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp +++ b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp @@ -71,7 +71,7 @@ AST_MATCHER(CXXMethodDecl, usesThis) { } UsageOfThis; // TraverseStmt does not modify its argument. - UsageOfThis.TraverseStmt(const_cast(Node.getBody())); + UsageOfThis.TraverseStmt(Node.getBody()); return UsageOfThis.Used; } diff --git a/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp b/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp index 85852c2c829a1..aace96f54c61c 100644 --- a/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp @@ -211,7 +211,7 @@ AST_MATCHER(CXXMethodDecl, usesThisAsConst) { FindUsageOfThis UsageOfThis(Finder->getASTContext()); // TraverseStmt does not modify its argument. - UsageOfThis.TraverseStmt(const_cast(Node.getBody())); + UsageOfThis.TraverseStmt(Node.getBody()); return UsageOfThis.Usage == Const; } diff --git a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp index ea6597dbdd617..6bb8c394f75cc 100644 --- a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp @@ -15,6 +15,17 @@ using namespace clang::ast_matchers; namespace clang::tidy::readability { +NamedParameterCheck::NamedParameterCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + InsertPlainNamesInForwardDecls( + Options.get("InsertPlainNamesInForwardDecls", false)) {} + +void NamedParameterCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "InsertPlainNamesInForwardDecls", + InsertPlainNamesInForwardDecls); +} + void NamedParameterCheck::registerMatchers(ast_matchers::MatchFinder *Finder) { Finder->addMatcher(functionDecl().bind("decl"), this); } @@ -84,7 +95,8 @@ void NamedParameterCheck::check(const MatchFinder::MatchResult &Result) { for (auto P : UnnamedParams) { // Fallback to an unused marker. - StringRef NewName = "unused"; + static constexpr StringRef FallbackName = "unused"; + StringRef NewName = FallbackName; // If the method is overridden, try to copy the name from the base method // into the overrider. @@ -105,12 +117,25 @@ void NamedParameterCheck::check(const MatchFinder::MatchResult &Result) { NewName = Name; } - // Now insert the comment. Note that getLocation() points to the place + // Now insert the fix. Note that getLocation() points to the place // where the name would be, this allows us to also get complex cases like // function pointers right. const ParmVarDecl *Parm = P.first->getParamDecl(P.second); - D << FixItHint::CreateInsertion(Parm->getLocation(), - " /*" + NewName.str() + "*/"); + + // The fix depends on the InsertPlainNamesInForwardDecls option, + // whether this is a forward declaration and whether the parameter has + // a real name. + const bool IsForwardDeclaration = (!Definition || Function != Definition); + if (InsertPlainNamesInForwardDecls && IsForwardDeclaration && + NewName != FallbackName) { + // For forward declarations with InsertPlainNamesInForwardDecls enabled, + // insert the parameter name without comments. + D << FixItHint::CreateInsertion(Parm->getLocation(), + " " + NewName.str()); + } else { + D << FixItHint::CreateInsertion(Parm->getLocation(), + " /*" + NewName.str() + "*/"); + } } } } diff --git a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h index 812d90ef7319c..f14a74d75eb49 100644 --- a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h +++ b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h @@ -26,13 +26,16 @@ namespace clang::tidy::readability { /// Corresponding cpplint.py check name: 'readability/function'. class NamedParameterCheck : public ClangTidyCheck { public: - NamedParameterCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} + NamedParameterCheck(StringRef Name, ClangTidyContext *Context); void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; std::optional getCheckTraversalKind() const override { return TK_IgnoreUnlessSpelledInSource; } + +private: + const bool InsertPlainNamesInForwardDecls; }; } // namespace clang::tidy::readability diff --git a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp index 9db059c26d6f2..5a04029e4a6fa 100644 --- a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp @@ -217,7 +217,7 @@ static bool applyJaroWinklerHeuristic(StringRef Arg, StringRef Param, SmallVector ArgFlags(ArgLen); SmallVector ParamFlags(ParamLen); std::ptrdiff_t Range = - std::max(std::ptrdiff_t{0}, std::max(ArgLen, ParamLen) / 2 - 1); + std::max(std::ptrdiff_t{0}, (std::max(ArgLen, ParamLen) / 2) - 1); // Calculate matching characters. for (std::ptrdiff_t I = 0; I < ParamLen; ++I) @@ -260,7 +260,7 @@ static bool applyJaroWinklerHeuristic(StringRef Arg, StringRef Param, // Calculate common string prefix up to 4 chars. L = 0; for (std::ptrdiff_t I = 0; - I < std::min(std::min(ArgLen, ParamLen), std::ptrdiff_t{4}); ++I) + I < std::min({ArgLen, ParamLen, std::ptrdiff_t{4}}); ++I) if (tolower(Arg[I]) == tolower(Param[I])) ++L; diff --git a/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.cpp b/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.cpp index 6faeb7a0b76e1..6914ec2beb2fb 100644 --- a/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.cpp +++ b/clang-tools-extra/clang-tidy/utils/DesignatedInitializers.cpp @@ -57,7 +57,7 @@ class AggregateDesignatorNames { } } // Returns false if the type was not an aggregate. - operator bool() { return Valid; } + operator bool() const { return Valid; } // Advance to the next element in the aggregate. void next() { if (IsArray) diff --git a/clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp b/clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp index 5b38ace13e2f2..53ce28e019f75 100644 --- a/clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp +++ b/clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp @@ -239,7 +239,9 @@ class HeaderGuardPPCallbacks : public PPCallbacks { Check->diag(StartLoc, "header is missing header guard") << FixItHint::CreateInsertion( - StartLoc, "#ifndef " + CPPVar + "\n#define " + CPPVar + "\n\n") + StartLoc, + (Twine("#ifndef ") + CPPVar + "\n#define " + CPPVar + "\n\n") + .str()) << FixItHint::CreateInsertion( SM.getLocForEndOfFile(FID), Check->shouldSuggestEndifComment(FileName) diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index 9104723c7f1c0..6cf38ddf3d914 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -30,11 +30,11 @@ template <> struct DenseMapInfo { using NamingCheckId = clang::tidy::RenamerClangTidyCheck::NamingCheckId; - static inline NamingCheckId getEmptyKey() { + static NamingCheckId getEmptyKey() { return {DenseMapInfo::getEmptyKey(), "EMPTY"}; } - static inline NamingCheckId getTombstoneKey() { + static NamingCheckId getTombstoneKey() { return {DenseMapInfo::getTombstoneKey(), "TOMBSTONE"}; } diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp index 16ee7ee79d75e..e421c9f11b24b 100644 --- a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp @@ -43,7 +43,7 @@ static std::string getFullPrefix(ArrayRef Signature) { llvm::raw_string_ostream OS(Output); for (const UseRangesCheck::Indexes &Item : Signature) OS << Item.BeginArg << ":" << Item.EndArg << ":" - << (Item.ReplaceArg == Item.First ? '0' : '1'); + << (Item.ReplaceArg == UseRangesCheck::Indexes::First ? '0' : '1'); return Output; } @@ -194,7 +194,7 @@ static void removeFunctionArgs(DiagnosticBuilder &Diag, const CallExpr &Call, void UseRangesCheck::check(const MatchFinder::MatchResult &Result) { Replacer *Replacer = nullptr; const FunctionDecl *Function = nullptr; - for (auto [Node, Value] : Result.Nodes.getMap()) { + for (const auto &[Node, Value] : Result.Nodes.getMap()) { StringRef NodeStr(Node); if (!NodeStr.consume_front(FuncDecl)) continue; diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h index 3a454bcf0cf07..a5ba6802dd89e 100644 --- a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h +++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h @@ -81,7 +81,7 @@ class UseRangesCheck : public ClangTidyCheck { void registerMatchers(ast_matchers::MatchFinder *Finder) final; void check(const ast_matchers::MatchFinder::MatchResult &Result) final; bool isLanguageVersionSupported(const LangOptions &LangOpts) const override; - void storeOptions(ClangTidyOptions::OptionMap &Options) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; std::optional getCheckTraversalKind() const override; private: diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp index d88aa01aad05d..6658111d6c7b4 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.cpp +++ b/clang-tools-extra/clangd/ModulesBuilder.cpp @@ -160,6 +160,16 @@ class ReusablePrerequisiteModules : public PrerequisiteModules { RequiredModule->getModuleFilePath().str()); } + std::string getAsString() const { + std::string Result; + llvm::raw_string_ostream OS(Result); + for (const auto &ModuleFile : RequiredModules) { + OS << "-fmodule-file=" << ModuleFile->getModuleName() << "=" + << ModuleFile->getModuleFilePath() << " "; + } + return Result; + } + bool canReuse(const CompilerInvocation &CI, llvm::IntrusiveRefCntPtr) const override; @@ -296,8 +306,27 @@ buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName, GenerateReducedModuleInterfaceAction Action; Clang->ExecuteAction(Action); - if (Clang->getDiagnostics().hasErrorOccurred()) - return llvm::createStringError("Compilation failed"); + if (Clang->getDiagnostics().hasErrorOccurred()) { + std::string Cmds; + for (const auto &Arg : Inputs.CompileCommand.CommandLine) { + if (!Cmds.empty()) + Cmds += " "; + Cmds += Arg; + } + + clangd::vlog("Failed to compile {0} with command: {1}.", ModuleUnitFileName, + Cmds); + + std::string BuiltModuleFilesStr = BuiltModuleFiles.getAsString(); + if (!BuiltModuleFilesStr.empty()) + clangd::vlog("The actual used module files built by clangd is {0}", + BuiltModuleFilesStr); + + return llvm::createStringError( + llvm::formatv("Failed to compile {0}. Use '--log=verbose' to view " + "detailed failure reasons.", + ModuleUnitFileName)); + } return ModuleFile{ModuleName, Inputs.CompileCommand.Output}; } diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index ad869265a2db5..95e6ee1b51334 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -215,7 +215,7 @@ Changes in existing checks - Improved :doc:`cppcoreguidelines-missing-std-forward ` check by adding a flag to specify the function used for forwarding instead of ``std::forward``. - + - Improved :doc:`cppcoreguidelines-pro-bounds-pointer-arithmetic ` check by fixing false positives when calling indexing operators that do not perform @@ -342,6 +342,11 @@ Changes in existing checks false negatives where math expressions are the operand of assignment operators or comparison operators. +- Improved :doc:`readability-named-parameter + ` check by adding the option + `InsertPlainNamesInForwardDecls` to insert parameter names without comments + for forward declarations only. + - Improved :doc:`readability-qualified-auto ` check by adding the option `AllowedTypes`, that excludes specified types from adding qualifiers. diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/named-parameter.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/named-parameter.rst index 73677a48605f4..48b7e84d38ec8 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability/named-parameter.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/named-parameter.rst @@ -23,3 +23,12 @@ If a parameter is not utilized, its name can be commented out in a function defi } Corresponding cpplint.py check name: `readability/function`. + +Options +------- + +.. option:: InsertPlainNamesInForwardDecls + + If set to `true`, the check will insert parameter names without comments for + forward declarations only. Otherwise, the check will insert parameter names + as comments (e.g., ``/*param*/``). Default is `false`. diff --git a/clang-tools-extra/test/clang-doc/json/class-requires.cpp b/clang-tools-extra/test/clang-doc/json/class-requires.cpp index 2dd25771d6d8b..213da93a1adfa 100644 --- a/clang-tools-extra/test/clang-doc/json/class-requires.cpp +++ b/clang-tools-extra/test/clang-doc/json/class-requires.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --extra-arg -std=c++20 --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/MyClass.json +// RUN: FileCheck %s < %t/_ZTV7MyClass.json template concept Addable = requires(T a, T b) { diff --git a/clang-tools-extra/test/clang-doc/json/class-specialization.cpp b/clang-tools-extra/test/clang-doc/json/class-specialization.cpp new file mode 100644 index 0000000000000..e9259edad5cb8 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/json/class-specialization.cpp @@ -0,0 +1,37 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --output=%t --format=json --executor=standalone %s +// RUN: FileCheck %s < %t/_ZTV7MyClass.json --check-prefix=BASE +// RUN: FileCheck %s < %t/_ZTV7MyClassIiE.json --check-prefix=SPECIALIZATION + +template struct MyClass {}; + +template<> struct MyClass {}; + +// BASE: "MangledName": "_ZTV7MyClass", +// BASE-NEXT: "Name": "MyClass", +// BASE-NEXT: "Namespace": [ +// BASE-NEXT: "GlobalNamespace" +// BASE-NEXT: ], +// BASE-NEXT: "Path": "GlobalNamespace", +// BASE-NEXT: "TagType": "struct", +// BASE-NEXT: "Template": { +// BASE-NEXT: "Parameters": [ +// BASE-NEXT: "typename T" +// BASE-NEXT: ] +// BASE-NEXT: }, + +// SPECIALIZATION: "MangledName": "_ZTV7MyClassIiE", +// SPECIALIZATION-NEXT: "Name": "MyClass", +// SPECIALIZATION-NEXT: "Namespace": [ +// SPECIALIZATION-NEXT: "GlobalNamespace" +// SPECIALIZATION-NEXT: ], +// SPECIALIZATION-NEXT: "Path": "GlobalNamespace", +// SPECIALIZATION-NEXT: "TagType": "struct", +// SPECIALIZATION-NEXT: "Template": { +// SPECIALIZATION-NEXT: "Specialization": { +// SPECIALIZATION-NEXT: "Parameters": [ +// SPECIALIZATION-NEXT: "int" +// SPECIALIZATION-NEXT: ], +// SPECIALIZATION-NEXT: "SpecializationOf": "{{[0-9A-F]*}}" +// SPECIALIZATION-NEXT: } +// SPECIALIZATION-NEXT: }, diff --git a/clang-tools-extra/test/clang-doc/json/class-template.cpp b/clang-tools-extra/test/clang-doc/json/class-template.cpp index fb9c4c2f21c2e..6cdc3e9175278 100644 --- a/clang-tools-extra/test/clang-doc/json/class-template.cpp +++ b/clang-tools-extra/test/clang-doc/json/class-template.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/MyClass.json +// RUN: FileCheck %s < %t/_ZTV7MyClass.json template struct MyClass { T MemberTemplate; diff --git a/clang-tools-extra/test/clang-doc/json/class.cpp b/clang-tools-extra/test/clang-doc/json/class.cpp index ae47da75edccb..d8317eafea91a 100644 --- a/clang-tools-extra/test/clang-doc/json/class.cpp +++ b/clang-tools-extra/test/clang-doc/json/class.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/MyClass.json +// RUN: FileCheck %s < %t/_ZTV7MyClass.json struct Foo; @@ -134,6 +134,7 @@ struct MyClass { // CHECK-NEXT: "Filename": "{{.*}}class.cpp", // CHECK-NEXT: "LineNumber": 10 // CHECK-NEXT: }, +// CHECK-NEXT: "MangledName": "_ZTV7MyClass", // CHECK-NEXT: "Name": "MyClass", // CHECK-NEXT: "Namespace": [ // CHECK-NEXT: "GlobalNamespace" diff --git a/clang-tools-extra/test/clang-doc/json/compound-constraints.cpp b/clang-tools-extra/test/clang-doc/json/compound-constraints.cpp index b49dec5cc78c5..34acb6808409d 100644 --- a/clang-tools-extra/test/clang-doc/json/compound-constraints.cpp +++ b/clang-tools-extra/test/clang-doc/json/compound-constraints.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --extra-arg -std=c++20 --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/index.json +// RUN: FileCheck %s < %t/index.json template concept Incrementable = requires (T a) { a++; diff --git a/clang-tools-extra/test/clang-doc/json/concept.cpp b/clang-tools-extra/test/clang-doc/json/concept.cpp index 887c9d79146a0..b946393274c85 100644 --- a/clang-tools-extra/test/clang-doc/json/concept.cpp +++ b/clang-tools-extra/test/clang-doc/json/concept.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --extra-arg -std=c++20 --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/index.json +// RUN: FileCheck %s < %t/index.json // Requires that T suports post and pre-incrementing. template diff --git a/clang-tools-extra/test/clang-doc/json/function-requires.cpp b/clang-tools-extra/test/clang-doc/json/function-requires.cpp index 4e8432e088c4f..08ac4c7ed2ca3 100644 --- a/clang-tools-extra/test/clang-doc/json/function-requires.cpp +++ b/clang-tools-extra/test/clang-doc/json/function-requires.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --extra-arg -std=c++20 --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/index.json +// RUN: FileCheck %s < %t/index.json template concept Incrementable = requires(T x) { diff --git a/clang-tools-extra/test/clang-doc/json/function-specifiers.cpp b/clang-tools-extra/test/clang-doc/json/function-specifiers.cpp index 7005fb7b3e66e..b194e3371bf76 100644 --- a/clang-tools-extra/test/clang-doc/json/function-specifiers.cpp +++ b/clang-tools-extra/test/clang-doc/json/function-specifiers.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/index.json +// RUN: FileCheck %s < %t/index.json static void myFunction() {} diff --git a/clang-tools-extra/test/clang-doc/json/method-template.cpp b/clang-tools-extra/test/clang-doc/json/method-template.cpp index ea9110d6c2d1c..ac8450a72d3a7 100644 --- a/clang-tools-extra/test/clang-doc/json/method-template.cpp +++ b/clang-tools-extra/test/clang-doc/json/method-template.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/MyClass.json +// RUN: FileCheck %s < %t/_ZTV7MyClass.json struct MyClass { template T methodTemplate(T param) { diff --git a/clang-tools-extra/test/clang-doc/json/namespace.cpp b/clang-tools-extra/test/clang-doc/json/namespace.cpp index 6e4fc6938d856..779d7b49f5581 100644 --- a/clang-tools-extra/test/clang-doc/json/namespace.cpp +++ b/clang-tools-extra/test/clang-doc/json/namespace.cpp @@ -1,6 +1,6 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/GlobalNamespace/index.json +// RUN: FileCheck %s < %t/index.json class MyClass {}; diff --git a/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp b/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp index 9b176feb67a00..54f95c4d041ca 100644 --- a/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp +++ b/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp @@ -1,7 +1,7 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --output=%t --format=json --executor=standalone %s -// RUN: FileCheck %s < %t/nested/index.json --check-prefix=NESTED -// RUN: FileCheck %s < %t/nested/inner/index.json --check-prefix=INNER +// RUN: FileCheck %s < %t/nested.json --check-prefix=NESTED +// RUN: FileCheck %s < %t/inner.json --check-prefix=INNER namespace nested { int Global; diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp index 50433d5d12ea9..8ae0d7055867b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp @@ -1,29 +1,47 @@ // RUN: %check_clang_tidy %s readability-named-parameter %t +// RUN: %check_clang_tidy -check-suffix=PLAIN-NAMES %s readability-named-parameter %t -- \ +// RUN: -config="{CheckOptions: [{key: readability-named-parameter.InsertPlainNamesInForwardDecls, value: true}]}" void Method(char *) { /* */ } // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: all parameters should be named in a function // CHECK-FIXES: void Method(char * /*unused*/) { /* */ } +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:19: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void Method(char * /*unused*/) { /* */ } void Method2(char *) {} // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: all parameters should be named in a function // CHECK-FIXES: void Method2(char * /*unused*/) {} +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:20: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void Method2(char * /*unused*/) {} void Method3(char *, void *) {} // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: all parameters should be named in a function // CHECK-FIXES: void Method3(char * /*unused*/, void * /*unused*/) {} +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:20: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void Method3(char * /*unused*/, void * /*unused*/) {} void Method4(char *, int /*unused*/) {} // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: all parameters should be named in a function // CHECK-FIXES: void Method4(char * /*unused*/, int /*unused*/) {} +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:20: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void Method4(char * /*unused*/, int /*unused*/) {} void operator delete[](void *) throw() {} // CHECK-MESSAGES: :[[@LINE-1]]:30: warning: all parameters should be named in a function // CHECK-FIXES: void operator delete[](void * /*unused*/) throw() {} +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:30: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void operator delete[](void * /*unused*/) throw() {} int Method5(int) { return 0; } // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: all parameters should be named in a function // CHECK-FIXES: int Method5(int /*unused*/) { return 0; } +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:16: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: int Method5(int /*unused*/) { return 0; } void Method6(void (*)(void *)) {} // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: all parameters should be named in a function // CHECK-FIXES: void Method6(void (* /*unused*/)(void *)) {} +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:21: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void Method6(void (* /*unused*/)(void *)) {} template void Method7(T) {} // CHECK-MESSAGES: :[[@LINE-1]]:37: warning: all parameters should be named in a function // CHECK-FIXES: template void Method7(T /*unused*/) {} +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:37: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: template void Method7(T /*unused*/) {} // Don't warn in macros. #define M void MethodM(int) {} @@ -55,6 +73,8 @@ struct Y { void foo(T) {} // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: all parameters should be named in a function // CHECK-FIXES: void foo(T /*unused*/) {} +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:13: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void foo(T /*unused*/) {} }; Y y; @@ -69,19 +89,27 @@ struct Derived : public Base { void foo(int); // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: all parameters should be named in a function // CHECK-FIXES: void foo(int /*argname*/); +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:15: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void foo(int argname); }; void FDef(int); // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: all parameters should be named in a function // CHECK-FIXES: void FDef(int /*n*/); +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:14: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void FDef(int n); void FDef(int n) {} void FDef2(int, int); // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: all parameters should be named in a function // CHECK-FIXES: void FDef2(int /*n*/, int /*unused*/); +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:15: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void FDef2(int n, int /*unused*/); void FDef2(int n, int) {} // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: all parameters should be named in a function // CHECK-FIXES: void FDef2(int n, int /*unused*/) {} +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:22: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: void FDef2(int n, int /*unused*/) {} void FNoDef(int); @@ -91,18 +119,26 @@ Z the_z; Z &operator++(Z&) { return the_z; } // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function // CHECK-FIXES: Z &operator++(Z& /*unused*/) { return the_z; } +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:17: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: Z &operator++(Z& /*unused*/) { return the_z; } Z &operator++(Z&, int) { return the_z; } // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function // CHECK-FIXES: Z &operator++(Z& /*unused*/, int) { return the_z; } +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:17: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: Z &operator++(Z& /*unused*/, int) { return the_z; } Z &operator--(Z&) { return the_z; } // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function // CHECK-FIXES: Z &operator--(Z& /*unused*/) { return the_z; } +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:17: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: Z &operator--(Z& /*unused*/) { return the_z; } Z &operator--(Z&, int) { return the_z; } // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function // CHECK-FIXES: Z &operator--(Z& /*unused*/, int) { return the_z; } +// CHECK-MESSAGES-PLAIN-NAMES: :[[@LINE-3]]:17: warning: all parameters should be named in a function +// CHECK-FIXES-PLAIN-NAMES: Z &operator--(Z& /*unused*/, int) { return the_z; } namespace testing { namespace internal { diff --git a/clang-tools-extra/unittests/clang-doc/JSONGeneratorTest.cpp b/clang-tools-extra/unittests/clang-doc/JSONGeneratorTest.cpp index 09e522133d832..5927235b3bd93 100644 --- a/clang-tools-extra/unittests/clang-doc/JSONGeneratorTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/JSONGeneratorTest.cpp @@ -67,6 +67,7 @@ TEST(JSONGeneratorTest, emitRecordJSON) { "IsParent": true, "IsTypedef": false, "IsVirtual": true, + "MangledName": "", "Name": "F", "Path": "path/to/F", "PublicFunctions": [ @@ -112,6 +113,7 @@ TEST(JSONGeneratorTest, emitRecordJSON) { "Filename": "main.cpp", "LineNumber": 1 }, + "MangledName": "", "Name": "Foo", "Namespace": [ "GlobalNamespace" diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index c61c808831704..ab374c1886165 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -1554,9 +1554,9 @@ the configuration (without a prefix: ``Auto``). .. code-block:: c++ - #define A \ - int aaaa; \ - int b; \ + #define A \ + int aaaa; \ + int b; \ int dddddddddd; diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b6af43c062013..e81a3d4976cf8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -312,6 +312,15 @@ C23 Feature Support `WG14 N2975 `_ - Fixed a bug with handling the type operand form of ``typeof`` when it is used to specify a fixed underlying type for an enumeration. #GH146351 +- Fixed a rejects-valid bug where Clang would reject an enumeration with an + ``_Atomic`` underlying type. The underlying type is the non-atomic, + unqualified version of the specified type. Due to the perhaps surprising lack + of atomic behavior, this is diagnosed under + ``-Wunderlying-atomic-qualifier-ignored``, which defaults to an error. This + can be downgraded with ``-Wno-underlying-atomic-qualifier-ignored`` or + ``-Wno-error=underlying-atomic-qualifier-ignored``. Clang now also diagnoses + cv-qualifiers as being ignored, but that warning does not default to an error. + It can be controlled by ``-Wunderlying-cv-qualifier-ignore``. (#GH147736) C11 Feature Support ^^^^^^^^^^^^^^^^^^^ @@ -895,6 +904,7 @@ Bug Fixes to C++ Support - Fixed a Clang regression in C++20 mode where unresolved dependent call expressions were created inside non-dependent contexts (#GH122892) - Clang now emits the ``-Wunused-variable`` warning when some structured bindings are unused and the ``[[maybe_unused]]`` attribute is not applied. (#GH125810) +- Fixed ``static_cast`` not performing access or ambiguity checks when converting to an rvalue reference to a base class. (#GH121429) - Declarations using class template argument deduction with redundant parentheses around the declarator are no longer rejected. (#GH39811) - Fixed a crash caused by invalid declarations of ``std::initializer_list``. (#GH132256) @@ -932,6 +942,7 @@ Bug Fixes to C++ Support - Fix a bug where private access specifier of overloaded function not respected. (#GH107629) - Correctly handles calling an explicit object member function template overload set through its address (``(&Foo::bar)()``). +- Fix a crash when forming an invalid call to an operator with an explicit object member. (#GH147121) - Correctly handle allocations in the condition of a ``if constexpr``.(#GH120197) (#GH134820) - Fixed a crash when handling invalid member using-declaration in C++20+ mode. (#GH63254) - Fixed parsing of lambda expressions that appear after ``*`` or ``&`` in contexts where a declaration can appear. (#GH63880) @@ -1074,6 +1085,8 @@ RISC-V Support CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ +* Provide a __device__ version of std::__glibcxx_assert_fail() in a header wrapper. + CUDA Support ^^^^^^^^^^^^ diff --git a/clang/include/clang/APINotes/APINotesManager.h b/clang/include/clang/APINotes/APINotesManager.h index 98592438e90ea..772fa5faa0f87 100644 --- a/clang/include/clang/APINotes/APINotesManager.h +++ b/clang/include/clang/APINotes/APINotesManager.h @@ -50,6 +50,13 @@ class APINotesManager { /// source file from which an entity was declared. bool ImplicitAPINotes; + /// Whether to apply all APINotes as optionally-applied versioned + /// entities. This means that when building a Clang module, + /// we capture every note on a given decl wrapped in a SwiftVersionedAttr + /// (with an empty version field for unversioned notes), and have the + /// client apply the relevant version's notes. + bool VersionIndependentSwift; + /// The Swift version to use when interpreting versioned API notes. llvm::VersionTuple SwiftVersion; @@ -167,6 +174,8 @@ class APINotesManager { /// Find the API notes readers that correspond to the given source location. llvm::SmallVector findAPINotes(SourceLocation Loc); + + bool captureVersionIndependentSwift() { return VersionIndependentSwift; } }; } // end namespace api_notes diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 5b2206af75bee..1118d3e062e68 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1776,7 +1776,8 @@ class OMPAtClause final : public OMPClause { } }; -/// This represents 'severity' clause in the '#pragma omp error' directive +/// This represents the 'severity' clause in the '#pragma omp error' and the +/// '#pragma omp parallel' directives. /// /// \code /// #pragma omp error severity(fatal) @@ -1856,7 +1857,8 @@ class OMPSeverityClause final : public OMPClause { } }; -/// This represents 'message' clause in the '#pragma omp error' directive +/// This represents the 'message' clause in the '#pragma omp error' and the +/// '#pragma omp parallel' directives. /// /// \code /// #pragma omp error message("GNU compiler required.") diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h new file mode 100644 index 0000000000000..9998702a41cab --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -0,0 +1,30 @@ +//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the entry point for a dataflow-based static analysis +// that checks for C++ lifetime violations. +// +// The analysis is based on the concepts of "origins" and "loans" to track +// pointer lifetimes and detect issues like use-after-free and dangling +// pointers. See the RFC for more details: +// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#include "clang/AST/DeclBase.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +namespace clang { + +void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, + AnalysisDeclContext &AC); + +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/UninitializedValues.h b/clang/include/clang/Analysis/Analyses/UninitializedValues.h index a2b37deddcec2..b151bc3f58321 100644 --- a/clang/include/clang/Analysis/Analyses/UninitializedValues.h +++ b/clang/include/clang/Analysis/Analyses/UninitializedValues.h @@ -47,6 +47,9 @@ class UninitUse { /// Does this use always see an uninitialized value? bool AlwaysUninit; + /// Is this use a const reference to this variable? + bool ConstRefUse = false; + /// This use is always uninitialized if it occurs after any of these branches /// is taken. SmallVector UninitBranches; @@ -61,10 +64,13 @@ class UninitUse { void setUninitAfterCall() { UninitAfterCall = true; } void setUninitAfterDecl() { UninitAfterDecl = true; } + void setConstRefUse() { ConstRefUse = true; } /// Get the expression containing the uninitialized use. const Expr *getUser() const { return User; } + bool isConstRefUse() const { return ConstRefUse; } + /// The kind of uninitialized use. enum Kind { /// The use might be uninitialized. @@ -110,10 +116,6 @@ class UninitVariablesHandler { virtual void handleUseOfUninitVariable(const VarDecl *vd, const UninitUse &use) {} - /// Called when the uninitialized variable is used as const refernce argument. - virtual void handleConstRefUseOfUninitVariable(const VarDecl *vd, - const UninitUse &use) {} - /// Called when the uninitialized variable analysis detects the /// idiom 'int x = x'. All other uses of 'x' within the initializer /// are handled by handleUseOfUninitVariable. diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0912a004549ae..224cb6a32af28 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3068,6 +3068,26 @@ def Regparm : TypeAttr { let ASTNode = 0; } +def SwiftType : Attr { + // This attribute has no spellings as it is only ever created implicitly + // from API notes. + let Spellings = []; + let Args = [StringArgument<"TypeString">]; + let SemaHandler = 0; + let Documentation = [InternalOnly]; +} + +def SwiftNullability : Attr { + // This attribute has no spellings as it is only ever created implicitly + // from API notes. + let Spellings = []; + let Args = [EnumArgument<"Kind", "Kind", /*is_string=*/false, + ["non_null", "nullable", "unspecified", "nullable_result"], + ["NonNull", "Nullable", "Unspecified", "NullableResult"]>]; + let SemaHandler = 0; + let Documentation = [InternalOnly]; +} + def SwiftAsyncName : InheritableAttr { let Spellings = [GNU<"swift_async_name">]; let Args = [StringArgument<"Name">]; diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a5ee8013adff6..4d371a9f7d6db 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -665,6 +665,9 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "n TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst") TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts") + TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index f54a830b0103e..9a7a308600763 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -532,6 +532,9 @@ def Dangling : DiagGroup<"dangling", [DanglingAssignment, DanglingInitializerList, DanglingGsl, ReturnStackAddress]>; + +def LifetimeSafety : DiagGroup<"experimental-lifetime-safety">; + def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">; def DllexportExplicitInstantiationDecl : DiagGroup<"dllexport-explicit-instantiation-decl">; def ExcessInitializers : DiagGroup<"excess-initializers">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 934f4453f02b9..3b8f396e37c48 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9365,6 +9365,16 @@ def warn_atomic_implicit_seq_cst : Warning< InGroup>, DefaultIgnore; def err_atomic_unsupported : Error< "atomic types are not supported in '%0'">; +def warn_cv_stripped_in_enum : Warning< + "%enum_select{" + "%Both{'const' and 'volatile' qualifiers}|" + "%Const{'const' qualifier}|" + "%Volatile{'volatile' qualifier}}0 in enumeration underlying type ignored">, + InGroup>; +def warn_atomic_stripped_in_enum : Warning< + "'_Atomic' qualifier ignored; operations involving the enumeration type will " + "be non-atomic">, + InGroup>, DefaultError; def err_overflow_builtin_must_be_int : Error< "operand argument to %select{overflow builtin|checked integer operation}0 " @@ -10627,6 +10637,10 @@ def warn_dangling_reference_captured_by_unknown : Warning< "object whose reference is captured will be destroyed at the end of " "the full-expression">, InGroup; +def warn_experimental_lifetime_safety_dummy_warning : Warning< + "todo: remove this warning after we have atleast one warning based on the lifetime analysis">, + InGroup, DefaultIgnore; + // For non-floating point, expressions of the form x == x or x != x // should result in a warning, since these always evaluate to a constant. // Array comparisons have similar warnings diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 72321c204ce96..e43238ba683f2 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -390,6 +390,7 @@ LANGOPT(RetainCommentsFromSystemHeaders, 1, 0, Compatible, "retain documentation LANGOPT(APINotes, 1, 0, NotCompatible, "use external API notes") LANGOPT(APINotesModules, 1, 0, NotCompatible, "use module-based external API notes") +LANGOPT(SwiftVersionIndependentAPINotes, 1, 0, NotCompatible, "use external API notes capturing all versions") LANGOPT(SanitizeAddressFieldPadding, 2, 0, NotCompatible, "controls how aggressive is ASan " "field padding (0: none, 1:least " diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index f0a8b32bf2f88..b4b94b8816d48 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -19,7 +19,7 @@ include "arm_sve_sme_incl.td" // Loads // Load one vector (scalar base) -def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -27,13 +27,8 @@ def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtRetu def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVLD1_BF : MInst<"svld1[_{2}]", "dPc", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; - def SVLD1_VNUM_BF : MInst<"svld1_vnum[_{2}]", "dPcl", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; -} - // Load one vector (scalar base, VL displacement) -def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; @@ -121,7 +116,7 @@ def SVLD1UW_GATHER_INDEX_S : MInst<"svld1uw_gather[_{2}base]_index_{d}", "dPul // First-faulting load one vector (scalar base) -def SVLDFF1 : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; +def SVLDFF1 : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; def SVLDFF1SB : MInst<"svldff1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; def SVLDFF1UB : MInst<"svldff1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; def SVLDFF1SH : MInst<"svldff1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; @@ -130,7 +125,7 @@ def SVLDFF1SW : MInst<"svldff1sw_{d}", "dPU", "lUl", [IsLoad], def SVLDFF1UW : MInst<"svldff1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; // First-faulting load one vector (scalar base, VL displacement) -def SVLDFF1_VNUM : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; +def SVLDFF1_VNUM : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; def SVLDFF1SB_VNUM : MInst<"svldff1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; def SVLDFF1UB_VNUM : MInst<"svldff1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; @@ -139,11 +134,6 @@ def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl", [IsL def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; } // let SVETargetGuard = "sve", SMETargetGuard = InvalidMode -let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in { - def SVLDFF1_BF : MInst<"svldff1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; - def SVLDFF1_VNUM_BF : MInst<"svldff1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; -} - let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { // First-faulting load one vector (vector base) def SVLDFF1_GATHER_BASES_U : MInst<"svldff1_gather[_{2}base]_{d}", "dPu", "ilUiUlfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ldff1_gather_scalar_offset">; @@ -223,7 +213,7 @@ def SVLDFF1SW_GATHER_INDEX_S : MInst<"svldff1sw_gather[_{2}base]_index_{d}", "dP def SVLDFF1UW_GATHER_INDEX_S : MInst<"svldff1uw_gather[_{2}base]_index_{d}", "dPul", "lUl", [IsGatherLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1_gather_scalar_offset">; // Non-faulting load one vector (scalar base) -def SVLDNF1 : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; +def SVLDNF1 : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; def SVLDNF1SB : MInst<"svldnf1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; def SVLDNF1UB : MInst<"svldnf1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; def SVLDNF1SH : MInst<"svldnf1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; @@ -232,7 +222,7 @@ def SVLDNF1SW : MInst<"svldnf1sw_{d}", "dPU", "lUl", [IsLoad], def SVLDNF1UW : MInst<"svldnf1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; // Non-faulting load one vector (scalar base, VL displacement) -def SVLDNF1_VNUM : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; +def SVLDNF1_VNUM : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; def SVLDNF1SB_VNUM : MInst<"svldnf1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; def SVLDNF1UB_VNUM : MInst<"svldnf1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; @@ -241,34 +231,17 @@ def SVLDNF1SW_VNUM : MInst<"svldnf1sw_vnum_{d}", "dPUl", "lUl", [IsL def SVLDNF1UW_VNUM : MInst<"svldnf1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; } // let SVETargetGuard = "sve", SMETargetGuard = InvalidMode -let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in { - def SVLDNF1_BF : MInst<"svldnf1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; - def SVLDNF1_VNUM_BF : MInst<"svldnf1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; -} - // Load one vector, unextended load, non-temporal (scalar base) -def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one vector, unextended load, non-temporal (scalar base, VL displacement) -def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; - def SVLDNT1_VNUM_BF : MInst<"svldnt1_vnum[_{2}]", "dPcl", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; -} +def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one quadword and replicate (scalar base) -def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdm", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>; -} +def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>; multiclass StructLoad f = []> { - def : SInst; - let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def: SInst; - } + def : SInst; } // Load N-element structure into N vectors (scalar base) @@ -283,10 +256,7 @@ defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret", // Load one octoword and replicate (scalar base) let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in { - def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfdm", MergeNone, "aarch64_sve_ld1ro">; -} -let SVETargetGuard = "sve,f64mm,bf16", SMETargetGuard = InvalidMode in { - def SVLD1RO_BF16 : SInst<"svld1ro[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1ro">; + def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld1ro">; } let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in { @@ -343,7 +313,7 @@ let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { // Stores // Store one vector (scalar base) -def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfdbm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -352,7 +322,7 @@ def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, Verify def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; // Store one vector (scalar base, VL displacement) -def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdbm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; @@ -360,11 +330,6 @@ def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl", [IsSt def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVST1_BF : MInst<"svst1[_{d}]", "vPpd", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; - def SVST1_VNUM_BF : MInst<"svst1_vnum[_{d}]", "vPpld", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; -} - let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { // Store one vector (vector base) def SVST1_SCATTER_BASES_U : MInst<"svst1_scatter[_{2}base_{d}]", "vPud", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1_scatter_scalar_offset">; @@ -437,11 +402,9 @@ def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "v } // let SVETargetGuard = "sve" multiclass StructStore f = []> { - def : SInst; - let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def: SInst; - } + def : SInst; } + // Store N vectors into N-element structure (scalar base) defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2", [VerifyRuntimeMode]>; defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3", [VerifyRuntimeMode]>; @@ -453,15 +416,10 @@ defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3", [V defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4", [VerifyRuntimeMode]>; // Store one vector, with no truncation, non-temporal (scalar base) -def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdbm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) -def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; - def SVSTNT1_VNUM_BF : MInst<"svstnt1_vnum[_{d}]", "vPpld", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; -} +def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdbm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { // Contiguous truncating store from quadword (single vector). @@ -563,18 +521,12 @@ def SVADRD : SInst<"svadrd[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, " // Scalar to vector def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "dssssssssssssssss", "cUc", MergeNone, "", [VerifyRuntimeMode]>; -def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "dssssssss", "sUsh", MergeNone, "", [VerifyRuntimeMode]>; -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVDUPQ_BF16 : SInst<"svdupq[_n]_{d}", "dssssssss", "b", MergeNone, "", [VerifyRuntimeMode]>; -} +def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "dssssssss", "sUshb", MergeNone, "", [VerifyRuntimeMode]>; def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "dssss", "iUif", MergeNone, "", [VerifyRuntimeMode]>; def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone, "", [VerifyRuntimeMode]>; multiclass svdup_base { - def NAME : SInst; - let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def _BF16: SInst; - } + def NAME : SInst; } defm SVDUP : svdup_base<"svdup[_n]_{d}", "ds", MergeNone, "aarch64_sve_dup_x">; @@ -700,10 +652,7 @@ def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aa def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr", [VerifyRuntimeMode]>; -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr", [VerifyRuntimeMode]>; -} +def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_insr", [VerifyRuntimeMode]>; //////////////////////////////////////////////////////////////////////////////// // Integer reductions @@ -786,13 +735,9 @@ multiclass SInstCLS def _Z : SInst; } -defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls", [VerifyRuntimeMode]>; -defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz", [VerifyRuntimeMode]>; -defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt", [VerifyRuntimeMode]>; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - defm SVCNT_BF16 : SInstCLS<"svcnt", "b", "aarch64_sve_cnt", [VerifyRuntimeMode]>; -} +defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls", [VerifyRuntimeMode]>; +defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz", [VerifyRuntimeMode]>; +defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfdb", "aarch64_sve_cnt", [VerifyRuntimeMode]>; //////////////////////////////////////////////////////////////////////////////// // Conversion @@ -1034,10 +979,7 @@ def SVCVTXNT_F32_F64 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aar // Permutations and selection multiclass SVEPerm { - def : SInst; - let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def: SInst; - } + def : SInst; } defm SVCLASTA : SVEPerm<"svclasta[_{d}]", "dPdd", "aarch64_sve_clasta">; @@ -1053,51 +995,26 @@ def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNo // splat of any possible lane. It is upto LLVM to pick a more efficient // instruction such as DUP (indexed) if the lane index fits the range of the // instruction's immediate. -def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { -def SVDUP_LANE_BF16 : - SInst<"svdup_lane[_{d}]", "ddL", "b", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; -} - -def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>; -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>; -} -def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>; +def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; +def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>; +def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>; defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">; defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">; -def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; -def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>; -def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>; -def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; -} +def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; +def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>; +def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>; +def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; -def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1", [VerifyRuntimeMode]>; -def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2", [VerifyRuntimeMode]>; +def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1", [VerifyRuntimeMode]>; +def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn2", [VerifyRuntimeMode]>; def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi", [VerifyRuntimeMode]>; def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi", [VerifyRuntimeMode]>; def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo", [VerifyRuntimeMode]>; def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo", [VerifyRuntimeMode]>; -def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1", [VerifyRuntimeMode]>; -def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2", [VerifyRuntimeMode]>; -def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1", [VerifyRuntimeMode]>; -def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2", [VerifyRuntimeMode]>; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { -def SVEXT_BF16 : SInst<"svext[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>; -def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; -def SVSEL_BF16 : SInst<"svsel[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>; -def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>; -def SVTRN1_BF16 : SInst<"svtrn1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1", [VerifyRuntimeMode]>; -def SVTRN2_BF16 : SInst<"svtrn2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2", [VerifyRuntimeMode]>; -def SVUZP1_BF16 : SInst<"svuzp1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1", [VerifyRuntimeMode]>; -def SVUZP2_BF16 : SInst<"svuzp2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2", [VerifyRuntimeMode]>; -def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1", [VerifyRuntimeMode]>; -def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2", [VerifyRuntimeMode]>; -} +def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp1", [VerifyRuntimeMode]>; +def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp2", [VerifyRuntimeMode]>; +def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip1", [VerifyRuntimeMode]>; +def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip2", [VerifyRuntimeMode]>; def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; def SVREV_B16 : SInst<"svrev_b16", "PP", "Pc", MergeNone, "aarch64_sve_rev_b16", [IsOverloadNone, VerifyRuntimeMode]>; @@ -1200,11 +1117,7 @@ def SVCNTW : SInst<"svcntw", "nv", "", MergeNone, "aarch64_sve_cntw", [IsAppendS def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone, VerifyRuntimeMode]>; def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [VerifyRuntimeMode]>; -def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone, "", [VerifyRuntimeMode]>; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { -def SVLEN_BF16 : SInst<"svlen[_{d}]", "nd", "b", MergeNone, "", [VerifyRuntimeMode]>; -} +def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfdb", MergeNone, "", [VerifyRuntimeMode]>; //////////////////////////////////////////////////////////////////////////////// // Saturating scalar arithmetic @@ -1290,44 +1203,24 @@ def SVMLLA_F32 : SInst<"svmmla[_f32]", "dddd","f", MergeNone, "aarch64_sve_fmmla let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in { def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd","d", MergeNone, "aarch64_sve_fmmla">; -def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q">; -def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q">; -def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q">; -def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q">; -def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q">; -def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q">; -} - -let SVETargetGuard = "sve,bf16,f64mm", SMETargetGuard = InvalidMode in { -def SVTRN1Q_BF16 : SInst<"svtrn1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1q">; -def SVTRN2Q_BF16 : SInst<"svtrn2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2q">; -def SVUZP1Q_BF16 : SInst<"svuzp1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1q">; -def SVUZP2Q_BF16 : SInst<"svuzp2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2q">; -def SVZIP1Q_BF16 : SInst<"svzip1q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1q">; -def SVZIP2Q_BF16 : SInst<"svzip2q[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2q">; +def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1q">; +def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn2q">; +def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp1q">; +def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp2q">; +def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip1q">; +def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip2q">; } //////////////////////////////////////////////////////////////////////////////// // Vector creation -def SVUNDEF_1 : SInst<"svundef_{d}", "dv", "csilUcUsUiUlhfdm", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; -def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfdm", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; -def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfdm", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; -def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfdm", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; +def SVUNDEF_1 : SInst<"svundef_{d}", "dv", "csilUcUsUiUlhfdbm", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; +def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfdbm", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; +def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfdbm", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; +def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfdbm", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; -def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; -def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; -def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { -def SVUNDEF_1_BF16 : SInst<"svundef_{d}", "dv", "b", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; -def SVUNDEF_2_BF16 : SInst<"svundef2_{d}", "2v", "b", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; -def SVUNDEF_3_BF16 : SInst<"svundef3_{d}", "3v", "b", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; -def SVUNDEF_4_BF16 : SInst<"svundef4_{d}", "4v", "b", MergeNone, "", [IsUndef, VerifyRuntimeMode]>; - -def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; -def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; -def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; -} +def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; +def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; +def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { def SVCREATE_2_B : SInst<"svcreate2[_b]", "2dd", "Pc", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>; @@ -1336,23 +1229,13 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { //////////////////////////////////////////////////////////////////////////////// // Vector insertion and extraction -def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; -def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>; -def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; +def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; +def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>; +def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; -def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; -def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>; -def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfdm", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; - -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { -def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; -def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>; -def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; - -def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; -def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>; -def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; -} +def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; +def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_2>]>; +def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfdbm", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { def SVGET_2_B : SInst<"svget2[_b]", "d2i", "Pc", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; @@ -1922,49 +1805,33 @@ def SVNMATCH : SInst<"svnmatch[_{d}]", "PPdd", "csUcUs", MergeNone, "aarch64_sve // SVE2 - Contiguous conflict detection let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { def SVWHILERW_B : SInst<"svwhilerw[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilerw_b", [IsOverloadWhileRW, VerifyRuntimeMode]>; -def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, VerifyRuntimeMode]>; +def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUshb", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, VerifyRuntimeMode]>; def SVWHILERW_S : SInst<"svwhilerw[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilerw_s", [IsOverloadWhileRW, VerifyRuntimeMode]>; def SVWHILERW_D : SInst<"svwhilerw[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilerw_d", [IsOverloadWhileRW, VerifyRuntimeMode]>; def SVWHILEWR_B : SInst<"svwhilewr[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilewr_b", [IsOverloadWhileRW, VerifyRuntimeMode]>; -def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW, VerifyRuntimeMode]>; +def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUshb", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW, VerifyRuntimeMode]>; def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilewr_s", [IsOverloadWhileRW, VerifyRuntimeMode]>; def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW, VerifyRuntimeMode]>; } -let SVETargetGuard = "sve2,bf16", SMETargetGuard = "sme,bf16" in { -def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, VerifyRuntimeMode]>; -def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW, VerifyRuntimeMode]>; -} - //////////////////////////////////////////////////////////////////////////////// // SVE2 - Extended table lookup/permute let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -def SVTBL2 : SInst<"svtbl2[_{d}]", "d2u", "csilUcUsUiUlhfd", MergeNone, "", [VerifyRuntimeMode]>; -def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx", [VerifyRuntimeMode]>; -} - -let SVETargetGuard = "sve2,bf16", SMETargetGuard = "sme,bf16" in { -def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u", "b", MergeNone, "", [VerifyRuntimeMode]>; -def SVTBX_BF16 : SInst<"svtbx[_{d}]", "dddu", "b", MergeNone, "aarch64_sve_tbx", [VerifyRuntimeMode]>; +def SVTBL2 : SInst<"svtbl2[_{d}]", "d2u", "csilUcUsUiUlhfdb", MergeNone, "", [VerifyRuntimeMode]>; +def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_tbx", [VerifyRuntimeMode]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Lookup table let SVETargetGuard = "sve2,lut", SMETargetGuard = "sme2,lut" in { def SVLUTI2_B : SInst<"svluti2_lane[_{d}]", "dd[i", "cUc", MergeNone, "aarch64_sve_luti2_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; - def SVLUTI2_H : SInst<"svluti2_lane[_{d}]", "dd[i", "sUsh", MergeNone, "aarch64_sve_luti2_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI2_H : SInst<"svluti2_lane[_{d}]", "dd[i", "sUshb", MergeNone, "aarch64_sve_luti2_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_B : SInst<"svluti4_lane[_{d}]", "dd[i", "cUc", MergeNone, "aarch64_sve_luti4_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>; - def SVLUTI4_H : SInst<"svluti4_lane[_{d}]", "dd[i", "sUsh", MergeNone, "aarch64_sve_luti4_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI4_H : SInst<"svluti4_lane[_{d}]", "dd[i", "sUshb", MergeNone, "aarch64_sve_luti4_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; - def SVLUTI4_x2 : SInst<"svluti4_lane[_{d}_x2]", "d2.d[i", "sUsh", MergeNone, "aarch64_sve_luti4_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; -} - -let SVETargetGuard = "sve2,lut,bf16", SMETargetGuard = "sme2,lut,bf16" in { - def SVLUTI2_BF16 : SInst<"svluti2_lane[_{d}]", "dd[i", "b", MergeNone, "aarch64_sve_luti2_lane", [ VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_7>]>; - def SVLUTI4_BF16 : SInst<"svluti4_lane[_{d}]", "dd[i", "b", MergeNone, "aarch64_sve_luti4_lane", [ VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; - def SVLUTI4_BF16_x2 : SInst<"svluti4_lane[_{d}_x2]", "d2.d[i", "b", MergeNone, "aarch64_sve_luti4_lane_x2", [ VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI4_x2 : SInst<"svluti4_lane[_{d}_x2]", "d2.d[i", "sUshb", MergeNone, "aarch64_sve_luti4_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; } //////////////////////////////////////////////////////////////////////////////// @@ -2322,15 +2189,11 @@ let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { // DUPQ def SVDUP_LANEQ_B : SInst<"svdup_laneq[_{d}]", "ddi", "cUcm", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_15>]>; - def SVDUP_LANEQ_H : SInst<"svdup_laneq[_{d}]", "ddi", "sUsh", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>; + def SVDUP_LANEQ_H : SInst<"svdup_laneq[_{d}]", "ddi", "sUshb", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>; def SVDUP_LANEQ_S : SInst<"svdup_laneq[_{d}]", "ddi", "iUif", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; def SVDUP_LANEQ_D : SInst<"svdup_laneq[_{d}]", "ddi", "lUld", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; } -let SVETargetGuard = "(sve2p1|sme2p1),bf16", SMETargetGuard = "(sve2p1|sme2p1),bf16" in { - def SVDUP_LANEQ_BF16 : SInst<"svdup_laneq[_{d}]", "ddi", "b", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>; -} - // // Multi-vector convert to/from floating-point. // diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index d800028cdcee5..275bb2b9924dd 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -499,7 +499,7 @@ multiclass RVVPseudoVWCVTBuiltin(ResultType)->getElementType(); + auto ElemTy = cast(Ops[1]->getType())->getElementType(); Ops.insert(Ops.begin() + 2, llvm::Constant::getNullValue(ElemTy)); if (IsMasked) { Ops.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs)); diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 6529f1386599c..99fcb322a42d5 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -1669,6 +1669,94 @@ def GetGlobalOp : CIR_Op<"get_global", }]; } +//===----------------------------------------------------------------------===// +// SetBitfieldOp +//===----------------------------------------------------------------------===// + +def SetBitfieldOp : CIR_Op<"set_bitfield"> { + let summary = "Set the value of a bitfield member"; + let description = [{ + The `cir.set_bitfield` operation provides a store-like access to + a bit field of a record. + + A bitfield info attribute must be provided to describe the location of + the bitfield within the memory referenced by the $addr argument. + The $src argument is inserted at the appropriate place in the memory and + the value that was stored. Returns the value being stored. + + A unit attribute `volatile` can be used to indicate a volatile store of the + bitfield. + ```mlir + cir.set_bitfield(#bfi, %0 : !cir.ptr, %1 : !s32i) {is_volatile} + -> !s32i + ``` + + Example. + Suppose we have a struct with multiple bitfields stored in + different storages. The `cir.set_bitfield` operation sets the value + of the bitfield. + ```C++ + typedef struct { + int a : 4; + int b : 27; + int c : 17; + int d : 2; + int e : 15; + } S; + + void store_bitfield(S& s) { + s.e = 3; + } + ``` + + ```mlir + // 'e' is in the storage with the index 1 + !record_type = !cir.record} #cir.record.decl.ast> + #bfi_e = #cir.bitfield_info + + %1 = cir.const #cir.int<3> : !s32i + %2 = cir.load %0 : !cir.ptr>, !cir.ptr + %3 = cir.get_member %2[1] {name = "e"} : !cir.ptr + -> !cir.ptr + %4 = cir.set_bitfield(#bfi_e, %3 : !cir.ptr, %1 : !s32i) -> !s32i + ``` + }]; + + let arguments = (ins + Arg:$addr, + CIR_AnyType:$src, + BitfieldInfoAttr:$bitfield_info, + UnitAttr:$is_volatile + ); + + let results = (outs CIR_IntType:$result); + + let assemblyFormat = [{ `(`$bitfield_info`,` $addr`:`qualified(type($addr))`,` + $src`:`type($src) `)` attr-dict `->` type($result) }]; + + let builders = [ + OpBuilder<(ins "mlir::Type":$type, + "mlir::Value":$addr, + "mlir::Type":$storage_type, + "mlir::Value":$src, + "llvm::StringRef":$name, + "unsigned":$size, + "unsigned":$offset, + "bool":$is_signed, + "bool":$is_volatile + ), + [{ + BitfieldInfoAttr info = + BitfieldInfoAttr::get($_builder.getContext(), + name, storage_type, + size, offset, is_signed); + build($_builder, $_state, type, addr, src, info, is_volatile); + }]> + ]; +} + //===----------------------------------------------------------------------===// // GetBitfieldOp //===----------------------------------------------------------------------===// @@ -1685,6 +1773,9 @@ def GetBitfieldOp : CIR_Op<"get_bitfield"> { A unit attribute `volatile` can be used to indicate a volatile load of the bitfield. + ```mlir + cir.get_bitfield(#bfi, %0 {is_volatile} : !cir.ptr) -> !s32i + ``` Example: Suppose we have a struct with multiple bitfields stored in @@ -2521,6 +2612,62 @@ def ComplexImagOp : CIR_Op<"complex.imag", [Pure]> { let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// ComplexRealPtrOp +//===----------------------------------------------------------------------===// + +def ComplexRealPtrOp : CIR_Op<"complex.real_ptr", [Pure]> { + let summary = "Derive a pointer to the real part of a complex value"; + let description = [{ + `cir.complex.real_ptr` operation takes a pointer operand that points to a + complex value of type `!cir.complex` and yields a pointer to the real part + of the operand. + + Example: + + ```mlir + %1 = cir.complex.real_ptr %0 : !cir.ptr> + -> !cir.ptr + ``` + }]; + + let results = (outs CIR_PtrToIntOrFloatType:$result); + let arguments = (ins CIR_PtrToComplexType:$operand); + + let assemblyFormat = [{ + $operand `:` + qualified(type($operand)) `->` qualified(type($result)) attr-dict + }]; + + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// ComplexAddOp +//===----------------------------------------------------------------------===// + +def ComplexAddOp : CIR_Op<"complex.add", [Pure, SameOperandsAndResultType]> { + let summary = "Complex addition"; + let description = [{ + The `cir.complex.add` operation takes two complex numbers and returns + their sum. + + Example: + + ```mlir + %2 = cir.complex.add %0, %1 : !cir.complex + ``` + }]; + + let arguments = (ins CIR_ComplexType:$lhs, CIR_ComplexType:$rhs); + + let results = (outs CIR_ComplexType:$result); + + let assemblyFormat = [{ + $lhs `,` $rhs `:` qualified(type($result)) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // Bit Manipulation Operations //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td index bcd516e27cc76..2bf77583465a6 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td @@ -159,6 +159,12 @@ def CIR_AnyIntOrFloatType : AnyTypeOf<[CIR_AnyFloatType, CIR_AnyIntType], let cppFunctionName = "isAnyIntegerOrFloatingPointType"; } +//===----------------------------------------------------------------------===// +// Complex Type predicates +//===----------------------------------------------------------------------===// + +def CIR_AnyComplexType : CIR_TypeBase<"::cir::ComplexType", "complex type">; + //===----------------------------------------------------------------------===// // Pointer Type predicates //===----------------------------------------------------------------------===// @@ -180,6 +186,17 @@ class CIR_PtrToPtrTo : CIR_ConfinedType], "pointer to pointer to " # summary>; +// Pointee type constraint bases +class CIR_PointeePred : SubstLeaves<"$_self", + "::mlir::cast<::cir::PointerType>($_self).getPointee()", pred>; + +class CIR_PtrToAnyOf types, string summary = ""> +: CIR_ConfinedType)>], + !if(!empty(summary), + "pointer to " # CIR_TypeSummaries.value, + summary)>; + // Void pointer type constraints def CIR_VoidPtrType : CIR_PtrTo<"::cir::VoidType", "void type">, @@ -192,6 +209,13 @@ def CIR_PtrToVoidPtrType "$_builder.getType<" # cppType # ">(" "cir::VoidType::get($_builder.getContext())))">; +class CIR_PtrToType : CIR_PtrToAnyOf<[type]>; + +// Pointer to type constraints +def CIR_PtrToIntOrFloatType : CIR_PtrToType; + +def CIR_PtrToComplexType : CIR_PtrToType; + //===----------------------------------------------------------------------===// // Vector Type predicates //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 928a37785ee16..b1314f2c53a79 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1891,6 +1891,12 @@ defm apinotes_modules : BoolOption<"f", "apinotes-modules", NegFlag, BothFlags<[], [ClangOption, CC1Option], " module-based external API notes support">>, Group; +defm swift_version_independent_apinotes : BoolOption<"f", "swift-version-independent-apinotes", + LangOpts<"SwiftVersionIndependentAPINotes">, DefaultFalse, + PosFlag, + NegFlag, + BothFlags<[], [ClangOption, CC1Option], " version-independent external API notes support">>, + Group; def fapinotes_swift_version : Joined<["-"], "fapinotes-swift-version=">, Group, Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 5d1fdb153b26e..74b516fe4f071 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -513,9 +513,9 @@ struct FormatStyle { ENAS_LeftWithLastLine, /// Align escaped newlines in the right-most column. /// \code - /// #define A \ - /// int aaaa; \ - /// int b; \ + /// #define A \ + /// int aaaa; \ + /// int b; \ /// int dddddddddd; /// \endcode ENAS_Right, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 443a389ab6a18..b331acbe606b7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1614,7 +1614,17 @@ class Sema final : public SemaBase { /// /// Triggered by declaration-attribute processing. void ProcessAPINotes(Decl *D); - + /// Apply the 'Nullability:' annotation to the specified declaration + void ApplyNullability(Decl *D, NullabilityKind Nullability); + /// Apply the 'Type:' annotation to the specified declaration + void ApplyAPINotesType(Decl *D, StringRef TypeString); + + /// Whether APINotes should be gathered for all applicable Swift language + /// versions, without being applied. Leaving clients of the current module + /// to select and apply the correct version. + bool captureSwiftVersionIndependentAPINotes() { + return APINotes.captureVersionIndependentSwift(); + } ///@} // diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h index ee24e5d1543d3..c3601a4e73e1f 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -57,6 +57,10 @@ struct TranslationUnitDeps { /// determined that the differences are benign for this compilation. std::vector ClangModuleDeps; + /// A list of module names that are visible to this translation unit. This + /// includes both direct and transitive module dependencies. + std::vector VisibleModules; + /// A list of the C++20 named modules this translation unit depends on. std::vector NamedModuleDeps; @@ -150,7 +154,7 @@ class DependencyScanningTool { /// Given a compilation context specified via the Clang driver command-line, /// gather modular dependencies of module with the given name, and return the /// information needed for explicit build. - llvm::Expected getModuleDependencies( + llvm::Expected getModuleDependencies( StringRef ModuleName, const std::vector &CommandLine, StringRef CWD, const llvm::DenseSet &AlreadySeen, LookupModuleOutputCallback LookupModuleOutput); @@ -188,6 +192,10 @@ class FullDependencyConsumer : public DependencyConsumer { DirectModuleDeps.push_back(ID); } + void handleVisibleModule(std::string ModuleName) override { + VisibleModules.push_back(ModuleName); + } + void handleContextHash(std::string Hash) override { ContextHash = std::move(Hash); } @@ -201,7 +209,6 @@ class FullDependencyConsumer : public DependencyConsumer { } TranslationUnitDeps takeTranslationUnitDeps(); - ModuleDepsGraph takeModuleGraphDeps(); private: std::vector Dependencies; @@ -210,6 +217,7 @@ class FullDependencyConsumer : public DependencyConsumer { std::string ModuleName; std::vector NamedModuleDeps; std::vector DirectModuleDeps; + std::vector VisibleModules; std::vector Commands; std::string ContextHash; std::vector OutputPaths; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h index 3e232c79397ce..6060e4b43312e 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -59,6 +59,8 @@ class DependencyConsumer { virtual void handleDirectModuleDependency(ModuleID MD) = 0; + virtual void handleVisibleModule(std::string ModuleName) = 0; + virtual void handleContextHash(std::string Hash) = 0; }; diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h index e96c49883d3c6..4136cb73f7043 100644 --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -323,6 +323,11 @@ class ModuleDepCollector final : public DependencyCollector { llvm::MapVector DirectPrebuiltModularDeps; /// Working set of direct modular dependencies. llvm::SetVector DirectModularDeps; + /// Working set of direct modular dependencies, as they were imported. + llvm::SmallPtrSet DirectImports; + /// All direct and transitive visible modules. + llvm::StringSet<> VisibleModules; + /// Options that control the dependency output generation. std::unique_ptr Opts; /// A Clang invocation that's based on the original TU invocation and that has @@ -337,6 +342,9 @@ class ModuleDepCollector final : public DependencyCollector { /// Checks whether the module is known as being prebuilt. bool isPrebuiltModule(const Module *M); + /// Computes all visible modules resolved from direct imports. + void addVisibleModules(); + /// Adds \p Path to \c FileDeps, making it absolute if necessary. void addFileDep(StringRef Path); /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary. diff --git a/clang/lib/APINotes/APINotesManager.cpp b/clang/lib/APINotes/APINotesManager.cpp index 4dc6ffd66bd53..60868ab104c46 100644 --- a/clang/lib/APINotes/APINotesManager.cpp +++ b/clang/lib/APINotes/APINotesManager.cpp @@ -49,7 +49,8 @@ class PrettyStackTraceDoubleString : public llvm::PrettyStackTraceEntry { } // namespace APINotesManager::APINotesManager(SourceManager &SM, const LangOptions &LangOpts) - : SM(SM), ImplicitAPINotes(LangOpts.APINotes) {} + : SM(SM), ImplicitAPINotes(LangOpts.APINotes), + VersionIndependentSwift(LangOpts.SwiftVersionIndependentAPINotes) {} APINotesManager::~APINotesManager() { // Free the API notes readers. diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index e7c085750b7ad..afa3b7ea7de7e 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -6532,14 +6532,13 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { if (DiscardResult) return true; - if (const auto *ECD = dyn_cast(D)) { + if (const auto *ECD = dyn_cast(D)) return this->emitConst(ECD->getInitVal(), E); - } else if (const auto *BD = dyn_cast(D)) { - return this->visit(BD->getBinding()); - } else if (const auto *FuncDecl = dyn_cast(D)) { + if (const auto *FuncDecl = dyn_cast(D)) { const Function *F = getFunction(FuncDecl); return F && this->emitGetFnPtr(F, E); - } else if (const auto *TPOD = dyn_cast(D)) { + } + if (const auto *TPOD = dyn_cast(D)) { if (std::optional Index = P.getOrCreateGlobal(D)) { if (!this->emitGetPtrGlobal(*Index, E)) return false; @@ -6560,13 +6559,15 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { // value. bool IsReference = D->getType()->isReferenceType(); - // Check for local/global variables and parameters. + // Local variables. if (auto It = Locals.find(D); It != Locals.end()) { const unsigned Offset = It->second.Offset; if (IsReference) return this->emitGetLocal(classifyPrim(E), Offset, E); return this->emitGetPtrLocal(Offset, E); - } else if (auto GlobalIndex = P.getGlobal(D)) { + } + // Global variables. + if (auto GlobalIndex = P.getGlobal(D)) { if (IsReference) { if (!Ctx.getLangOpts().CPlusPlus11) return this->emitGetGlobal(classifyPrim(E), *GlobalIndex, E); @@ -6574,7 +6575,9 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { } return this->emitGetPtrGlobal(*GlobalIndex, E); - } else if (const auto *PVD = dyn_cast(D)) { + } + // Function parameters. + if (const auto *PVD = dyn_cast(D)) { if (auto It = this->Params.find(PVD); It != this->Params.end()) { if (IsReference || !It->second.IsPtr) return this->emitGetParam(classifyPrim(E), It->second.Offset, E); @@ -6600,7 +6603,7 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { return this->visitDeclRef(D, E); }; - // Handle lambda captures. + // Lambda captures. if (auto It = this->LambdaCaptures.find(D); It != this->LambdaCaptures.end()) { auto [Offset, IsPtr] = It->second; @@ -6608,12 +6611,17 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { if (IsPtr) return this->emitGetThisFieldPtr(Offset, E); return this->emitGetPtrThisField(Offset, E); - } else if (const auto *DRE = dyn_cast(E); - DRE && DRE->refersToEnclosingVariableOrCapture()) { + } + + if (const auto *DRE = dyn_cast(E); + DRE && DRE->refersToEnclosingVariableOrCapture()) { if (const auto *VD = dyn_cast(D); VD && VD->isInitCapture()) return revisit(VD); } + if (const auto *BD = dyn_cast(D)) + return this->visit(BD->getBinding()); + // Avoid infinite recursion. if (D == InitializingDecl) return this->emitDummyPtr(D, E); @@ -6666,7 +6674,7 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { if (VD->evaluateValue()) return revisit(VD); - if (!D->getType()->isReferenceType()) + if (!IsReference) return this->emitDummyPtr(D, E); return this->emitInvalidDeclRef(cast(E), diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index be77657acabcc..457de2bed37d6 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -445,13 +445,7 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { assert(Desc); const auto *D = Desc->asVarDecl(); - if (!D || !D->hasGlobalStorage()) - return true; - - if (D == S.EvaluatingDecl) - return true; - - if (D->isConstexpr()) + if (!D || D == S.EvaluatingDecl || D->isConstexpr()) return true; // If we're evaluating the initializer for a constexpr variable in C23, we may @@ -576,23 +570,14 @@ bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!Ptr.isConst() || Ptr.isMutable()) return true; - // The This pointer is writable in constructors and destructors, - // even if isConst() returns true. - // TODO(perf): We could be hitting this code path quite a lot in complex - // constructors. Is there a better way to do this? - if (S.Current->getFunction()) { - for (const InterpFrame *Frame = S.Current; Frame; Frame = Frame->Caller) { - if (const Function *Func = Frame->getFunction(); - Func && (Func->isConstructor() || Func->isDestructor()) && - Ptr.block() == Frame->getThis().block()) { - return true; - } - } - } - if (!Ptr.isBlockPointer()) return false; + // The This pointer is writable in constructors and destructors, + // even if isConst() returns true. + if (llvm::find(S.InitializingBlocks, Ptr.block())) + return true; + const QualType Ty = Ptr.getType(); const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_modify_const_type) << Ty; @@ -1524,6 +1509,9 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, return false; if (Func->isDestructor() && !CheckDestructor(S, OpPC, ThisPtr)) return false; + + if (Func->isConstructor() || Func->isDestructor()) + S.InitializingBlocks.push_back(ThisPtr.block()); } if (!Func->isFullyCompiled()) @@ -1550,16 +1538,21 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, // Note that we cannot assert(CallResult.hasValue()) here since // Ret() above only sets the APValue if the curent frame doesn't // have a caller set. - if (Interpret(S)) { - NewFrame.release(); // Frame was delete'd already. - assert(S.Current == FrameBefore); - return true; + bool Success = Interpret(S); + // Remove initializing block again. + if (Func->isConstructor() || Func->isDestructor()) + S.InitializingBlocks.pop_back(); + + if (!Success) { + // Interpreting the function failed somehow. Reset to + // previous state. + S.Current = FrameBefore; + return false; } - // Interpreting the function failed somehow. Reset to - // previous state. - S.Current = FrameBefore; - return false; + NewFrame.release(); // Frame was delete'd already. + assert(S.Current == FrameBefore); + return true; } bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h index 08765561985e2..861e4c38049ab 100644 --- a/clang/lib/AST/ByteCode/InterpState.h +++ b/clang/lib/AST/ByteCode/InterpState.h @@ -190,6 +190,10 @@ class InterpState final : public State, public SourceMapper { std::pair> SeenGlobalTemporaries; + /// List of blocks we're currently running either constructors or destructors + /// for. + llvm::SmallVector InitializingBlocks; + mutable llvm::BumpPtrAllocator Allocator; }; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e23e84368516a..9092668c67d92 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11537,12 +11537,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { switch (E->getBuiltinCallee()) { case Builtin::BI__builtin_elementwise_add_sat: ResultElements.push_back(APValue( - APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : RHS.uadd_sat(RHS), + APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS), DestEltTy->isUnsignedIntegerOrEnumerationType()))); break; case Builtin::BI__builtin_elementwise_sub_sat: ResultElements.push_back(APValue( - APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : RHS.usub_sat(RHS), + APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), DestEltTy->isUnsignedIntegerOrEnumerationType()))); break; } diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 8cd3990db4c3e..0523d92480cb3 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,6 +21,7 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp + LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp new file mode 100644 index 0000000000000..1f18952ce96da --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -0,0 +1,510 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TimeProfiler.h" +#include + +namespace clang { +namespace { + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +/// TODO: Model access paths of other types, e.g., s.field, heap and globals. +struct AccessPath { + const clang::ValueDecl *D; + + AccessPath(const clang::ValueDecl *D) : D(D) {} +}; + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template struct ID { + uint32_t Value = 0; + + bool operator==(const ID &Other) const { return Value == Other.Value; } + bool operator!=(const ID &Other) const { return !(*this == Other); } + bool operator<(const ID &Other) const { return Value < Other.Value; } + ID operator++(int) { + ID Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +template +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID ID) { + return OS << ID.Value; +} + +using LoanID = ID; +using OriginID = ID; + +/// Information about a single borrow, or "Loan". A loan is created when a +/// reference or pointer is created. +struct Loan { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet + LoanID ID; + AccessPath Path; + SourceLocation IssueLoc; + + Loan(LoanID id, AccessPath path, SourceLocation loc) + : ID(id), Path(path), IssueLoc(loc) {} +}; + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Enhance the origin model to handle complex types, pointer +/// indirection and reborrowing. The plan is to move from a single origin per +/// variable/expression to a "list of origins" governed by the Type. +/// For example, the type 'int**' would have two origins. +/// See discussion: +/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 +struct Origin { + OriginID ID; + /// A pointer to the AST node that this origin represents. This union + /// distinguishes between origins from declarations (variables or parameters) + /// and origins from expressions. + llvm::PointerUnion Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast(); + } +}; + +/// Manages the creation, storage and retrieval of loans. +class LoanManager { +public: + LoanManager() = default; + + Loan &addLoan(AccessPath Path, SourceLocation Loc) { + AllLoans.emplace_back(getNextLoanID(), Path, Loc); + return AllLoans.back(); + } + + const Loan &getLoan(LoanID ID) const { + assert(ID.Value < AllLoans.size()); + return AllLoans[ID.Value]; + } + llvm::ArrayRef getLoans() const { return AllLoans; } + +private: + LoanID getNextLoanID() { return NextLoanID++; } + + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllLoans; +}; + +/// Manages the creation, storage, and retrieval of origins for pointer-like +/// variables and expressions. +class OriginManager { +public: + OriginManager() = default; + + Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); + } + Origin &addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); + } + + OriginID get(const Expr &E) { + // Origin of DeclRefExpr is that of the declaration it refers to. + if (const auto *DRE = dyn_cast(&E)) + return get(*DRE->getDecl()); + auto It = ExprToOriginID.find(&E); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == ExprToOriginID.end()) + return getOrCreate(E); + + return It->second; + } + + OriginID get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + // TODO: This should be an assert(It != DeclToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + + return It->second; + } + + OriginID getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + if (const auto *DRE = dyn_cast(&E)) { + // Origin of DeclRefExpr is that of the declaration it refers to. + return getOrCreate(*DRE->getDecl()); + } + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; + } + + const Origin &getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; + } + + llvm::ArrayRef getOrigins() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; + } + +private: + OriginID getNextOriginID() { return NextOriginID++; } + + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllOrigins; + llvm::DenseMap DeclToOriginID; + llvm::DenseMap ExprToOriginID; +}; + +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + AssignOrigin, + /// An origin escapes the function by flowing into the return value. + ReturnOfOrigin + }; + +private: + Kind K; + +protected: + Fact(Kind K) : K(K) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + + template const T *getAs() const { + if (T::classof(this)) + return static_cast(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS) const { + OS << "Fact (Kind: " << static_cast(K) << ")\n"; + } +}; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS) const override { + OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID() + << ")\n"; + } +}; + +class ExpireFact : public Fact { + LoanID LID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {} + LoanID getLoanID() const { return LID; } + void dump(llvm::raw_ostream &OS) const override { + OS << "Expire (LoanID: " << getLoanID() << ")\n"; + } +}; + +class AssignOriginFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::AssignOrigin; + } + + AssignOriginFact(OriginID OIDDest, OriginID OIDSrc) + : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + void dump(llvm::raw_ostream &OS) const override { + OS << "AssignOrigin (DestID: " << getDestOriginID() + << ", SrcID: " << getSrcOriginID() << ")\n"; + } +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::ReturnOfOrigin; + } + + ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS) const override { + OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n"; + } +}; + +class FactManager { +public: + llvm::ArrayRef getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { + if (!NewFacts.empty()) + BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + } + + template + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate(); + return new (Mem) FactType(std::forward(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) + if (const auto *ND = dyn_cast(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs()); + } + } + llvm::dbgs() << " End of Block\n"; + } + } + + LoanManager &getLoanMgr() { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + +private: + LoanManager LoanMgr; + OriginManager OriginMgr; + llvm::DenseMap> + BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; + +class FactGenerator : public ConstStmtVisitor { + +public: + FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), AC(AC) {} + + void run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis()) { + CurrentBlockFacts.clear(); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional CS = Element.getAs()) + Visit(CS->getStmt()); + else if (std::optional DtorOpt = + Element.getAs()) + handleDestructor(*DtorOpt); + } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } + } + + void VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast(D)) + if (hasOrigin(VD->getType())) + if (const Expr *InitExpr = VD->getInit()) + addAssignOriginFact(*VD, *InitExpr); + } + + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE->getType())) + return; + Visit(ICE->getSubExpr()); + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + // TODO: Consider if this is actually useful in practice. Alternatively, we + // could directly use the sub-expression's OriginID instead of creating a + // new one. + addAssignOriginFact(*ICE, *ICE->getSubExpr()); + } + + void VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + if (const auto *DRE = dyn_cast(SubExpr)) { + if (const auto *VD = dyn_cast(DRE->getDecl())) { + // Check if it's a local variable. + if (VD->hasLocalStorage()) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO); + AccessPath AddrOfLocalVarPath(VD); + const Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, + UO->getOperatorLoc()); + CurrentBlockFacts.push_back( + FactMgr.createFact(L.ID, OID)); + } + } + } + } + } + + void VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr->getType())) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back( + FactMgr.createFact(OID)); + } + } + } + + void VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) { + const Expr *LHSExpr = BO->getLHS(); + const Expr *RHSExpr = BO->getRHS(); + + // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` + // LHS must be a pointer/reference type that can be an origin. + // RHS must also represent an origin (either another pointer/ref or an + // address-of). + if (const auto *DRE_LHS = dyn_cast(LHSExpr)) + if (const auto *VD_LHS = + dyn_cast(DRE_LHS->getDecl()->getCanonicalDecl()); + VD_LHS && hasOrigin(VD_LHS->getType())) + addAssignOriginFact(*VD_LHS, *RHSExpr); + } + } + +private: + // Check if a type has an origin. + bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } + + template + void addAssignOriginFact(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact(DestOID, SrcOID)); + } + + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + /// variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the + /// lifetime ends. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.D == DestructedVD) + CurrentBlockFacts.push_back(FactMgr.createFact(L.ID)); + } + } + + FactManager &FactMgr; + AnalysisDeclContext &AC; + llvm::SmallVector CurrentBlockFacts; +}; + +// ========================================================================= // +// TODO: Run dataflow analysis to propagate loans, analyse and error reporting. +// ========================================================================= // +} // anonymous namespace + +void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, + AnalysisDeclContext &AC) { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + FactManager FactMgr; + FactGenerator FactGen(FactMgr, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); +} +} // namespace clang diff --git a/clang/lib/Analysis/UninitializedValues.cpp b/clang/lib/Analysis/UninitializedValues.cpp index b2a68b6c39a7e..8c9cf8dac79ed 100644 --- a/clang/lib/Analysis/UninitializedValues.cpp +++ b/clang/lib/Analysis/UninitializedValues.cpp @@ -161,8 +161,7 @@ class CFGBlockValues { ValueVector::reference operator[](const VarDecl *vd); - Value getValue(const CFGBlock *block, const CFGBlock *dstBlock, - const VarDecl *vd) { + Value getValue(const CFGBlock *block, const VarDecl *vd) { std::optional idx = declToIndex.getValueIndex(vd); return getValueVector(block)[*idx]; } @@ -589,12 +588,12 @@ class TransferFunctions : public StmtVisitor { if (!Pred) continue; - Value AtPredExit = vals.getValue(Pred, B, vd); + Value AtPredExit = vals.getValue(Pred, vd); if (AtPredExit == Initialized) // This block initializes the variable. continue; if (AtPredExit == MayUninitialized && - vals.getValue(B, nullptr, vd) == Uninitialized) { + vals.getValue(B, vd) == Uninitialized) { // This block declares the variable (uninitialized), and is reachable // from a block that initializes the variable. We can't guarantee to // give an earlier location for the diagnostic (and it appears that @@ -625,6 +624,8 @@ class TransferFunctions : public StmtVisitor { // Scan the frontier, looking for blocks where the variable was // uninitialized. for (const auto *Block : cfg) { + if (vals.getValue(Block, vd) != Uninitialized) + continue; unsigned BlockID = Block->getBlockID(); const Stmt *Term = Block->getTerminatorStmt(); if (SuccsVisited[BlockID] && SuccsVisited[BlockID] < Block->succ_size() && @@ -635,8 +636,7 @@ class TransferFunctions : public StmtVisitor { for (CFGBlock::const_succ_iterator I = Block->succ_begin(), E = Block->succ_end(); I != E; ++I) { const CFGBlock *Succ = *I; - if (Succ && SuccsVisited[Succ->getBlockID()] >= Succ->succ_size() && - vals.getValue(Block, Succ, vd) == Uninitialized) { + if (Succ && SuccsVisited[Succ->getBlockID()] >= Succ->succ_size()) { // Switch cases are a special case: report the label to the caller // as the 'terminator', not the switch statement itself. Suppress // situations where no label matched: we can't be sure that's @@ -675,8 +675,11 @@ void TransferFunctions::reportUse(const Expr *ex, const VarDecl *vd) { void TransferFunctions::reportConstRefUse(const Expr *ex, const VarDecl *vd) { Value v = vals[vd]; - if (isAlwaysUninit(v)) - handler.handleConstRefUseOfUninitVariable(vd, getUninitUse(ex, vd, v)); + if (isAlwaysUninit(v)) { + auto use = getUninitUse(ex, vd, v); + use.setConstRefUse(); + handler.handleUseOfUninitVariable(vd, use); + } } void TransferFunctions::VisitObjCForCollectionStmt(ObjCForCollectionStmt *FS) { @@ -891,12 +894,6 @@ struct PruneBlocksHandler : public UninitVariablesHandler { hadAnyUse = true; } - void handleConstRefUseOfUninitVariable(const VarDecl *vd, - const UninitUse &use) override { - hadUse[currentBlock] = true; - hadAnyUse = true; - } - /// Called when the uninitialized variable analysis detects the /// idiom 'int x = x'. All other uses of 'x' within the initializer /// are handled by handleUseOfUninitVariable. diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 0b33f6c7d03b7..d0b6b6918f0e2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -364,6 +364,20 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return create(loc, operandTy.getElementType(), operand); } + /// Create a cir.complex.real_ptr operation that derives a pointer to the real + /// part of the complex value pointed to by the specified pointer value. + mlir::Value createComplexRealPtr(mlir::Location loc, mlir::Value value) { + auto srcPtrTy = mlir::cast(value.getType()); + auto srcComplexTy = mlir::cast(srcPtrTy.getPointee()); + return create( + loc, getPointerTo(srcComplexTy.getElementType()), value); + } + + Address createComplexRealPtr(mlir::Location loc, Address addr) { + return Address{createComplexRealPtr(loc, addr.getPointer()), + addr.getAlignment()}; + } + /// Create a cir.ptr_stride operation to get access to an array element. /// \p idx is the index of the element to access, \p shouldDecay is true if /// the result should decay to a pointer to the element type. @@ -394,6 +408,15 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return createGlobal(module, loc, uniqueName, type, linkage); } + mlir::Value createSetBitfield(mlir::Location loc, mlir::Type resultType, + mlir::Value dstAddr, mlir::Type storageType, + mlir::Value src, const CIRGenBitFieldInfo &info, + bool isLvalueVolatile, bool useVolatile) { + return create(loc, resultType, dstAddr, storageType, + src, info.name, info.size, info.offset, + info.isSigned, isLvalueVolatile); + } + mlir::Value createGetBitfield(mlir::Location loc, mlir::Type resultType, mlir::Value addr, mlir::Type storageType, const CIRGenBitFieldInfo &info, diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index da8166a596d42..cc4a615dc392e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -117,6 +117,75 @@ static void emitMemberInitializer(CIRGenFunction &cgf, cgf.emitInitializerForField(field, lhs, memberInit->getInit()); } +static bool isInitializerOfDynamicClass(const CXXCtorInitializer *baseInit) { + const Type *baseType = baseInit->getBaseClass(); + const auto *baseClassDecl = + cast(baseType->castAs()->getDecl()); + return baseClassDecl->isDynamicClass(); +} + +/// Gets the address of a direct base class within a complete object. +/// This should only be used for (1) non-virtual bases or (2) virtual bases +/// when the type is known to be complete (e.g. in complete destructors). +/// +/// The object pointed to by 'thisAddr' is assumed to be non-null. +Address CIRGenFunction::getAddressOfDirectBaseInCompleteClass( + mlir::Location loc, Address thisAddr, const CXXRecordDecl *derived, + const CXXRecordDecl *base, bool baseIsVirtual) { + // 'thisAddr' must be a pointer (in some address space) to Derived. + assert(thisAddr.getElementType() == convertType(derived)); + + // Compute the offset of the virtual base. + CharUnits offset; + const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived); + if (baseIsVirtual) + offset = layout.getVBaseClassOffset(base); + else + offset = layout.getBaseClassOffset(base); + + return builder.createBaseClassAddr(loc, thisAddr, convertType(base), + offset.getQuantity(), + /*assumeNotNull=*/true); +} + +void CIRGenFunction::emitBaseInitializer(mlir::Location loc, + const CXXRecordDecl *classDecl, + CXXCtorInitializer *baseInit) { + assert(curFuncDecl && "loading 'this' without a func declaration?"); + assert(isa(curFuncDecl)); + + assert(baseInit->isBaseInitializer() && "Must have base initializer!"); + + Address thisPtr = loadCXXThisAddress(); + + const Type *baseType = baseInit->getBaseClass(); + const auto *baseClassDecl = + cast(baseType->castAs()->getDecl()); + + bool isBaseVirtual = baseInit->isBaseVirtual(); + + // If the initializer for the base (other than the constructor + // itself) accesses 'this' in any way, we need to initialize the + // vtables. + if (classDecl->isDynamicClass()) { + cgm.errorNYI(loc, "emitBaseInitializer: dynamic class"); + return; + } + + // We can pretend to be a complete class because it only matters for + // virtual bases, and we only do virtual bases for complete ctors. + Address v = getAddressOfDirectBaseInCompleteClass( + loc, thisPtr, classDecl, baseClassDecl, isBaseVirtual); + assert(!cir::MissingFeatures::aggValueSlotGC()); + AggValueSlot aggSlot = AggValueSlot::forAddr( + v, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::IsNotAliased, + getOverlapForBaseInit(classDecl, baseClassDecl, isBaseVirtual)); + + emitAggExpr(baseInit->getInit(), aggSlot); + + assert(!cir::MissingFeatures::requiresCleanups()); +} + /// This routine generates necessary code to initialize base classes and /// non-static data members belonging to this constructor. void CIRGenFunction::emitCtorPrologue(const CXXConstructorDecl *cd, @@ -154,12 +223,29 @@ void CIRGenFunction::emitCtorPrologue(const CXXConstructorDecl *cd, return; } - if ((*b)->isBaseInitializer()) { + const mlir::Value oldThisValue = cxxThisValue; + if (!constructVBases && (*b)->isBaseInitializer() && (*b)->isBaseVirtual()) { cgm.errorNYI(cd->getSourceRange(), - "emitCtorPrologue: non-virtual base initializer"); + "emitCtorPrologue: virtual base initializer"); return; } + // Handle non-virtual base initializers. + for (; b != e && (*b)->isBaseInitializer(); b++) { + assert(!(*b)->isBaseVirtual()); + + if (cgm.getCodeGenOpts().StrictVTablePointers && + cgm.getCodeGenOpts().OptimizationLevel > 0 && + isInitializerOfDynamicClass(*b)) { + cgm.errorNYI(cd->getSourceRange(), + "emitCtorPrologue: strict vtable pointers"); + return; + } + emitBaseInitializer(getLoc(cd->getBeginLoc()), classDecl, *b); + } + + cxxThisValue = oldThisValue; + if (classDecl->isDynamicClass()) { cgm.errorNYI(cd->getSourceRange(), "emitCtorPrologue: initialize vtable pointers"); diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 300ba7a456e4b..b1d6b8047a0ab 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -224,6 +224,10 @@ void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst, return; } + assert(dst.isBitField() && "Unknown LValue type"); + emitStoreThroughBitfieldLValue(src, dst); + return; + cgm.errorNYI(dst.getPointer().getLoc(), "emitStoreThroughLValue: non-simple lvalue"); return; @@ -321,9 +325,21 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, mlir::Value CIRGenFunction::emitStoreThroughBitfieldLValue(RValue src, LValue dst) { - assert(!cir::MissingFeatures::bitfields()); - cgm.errorNYI("bitfields"); - return {}; + + assert(!cir::MissingFeatures::armComputeVolatileBitfields()); + + const CIRGenBitFieldInfo &info = dst.getBitFieldInfo(); + mlir::Type resLTy = convertTypeForMem(dst.getType()); + Address ptr = dst.getBitFieldAddress(); + + assert(!cir::MissingFeatures::armComputeVolatileBitfields()); + const bool useVolatile = false; + + mlir::Value dstAddr = dst.getAddress().getPointer(); + + return builder.createSetBitfield(dstAddr.getLoc(), resLTy, dstAddr, + ptr.getElementType(), src.getValue(), info, + dst.isVolatileQualified(), useVolatile); } RValue CIRGenFunction::emitLoadOfBitfieldLValue(LValue lv, SourceLocation loc) { @@ -621,8 +637,30 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *e) { } case UO_Real: case UO_Imag: { - cgm.errorNYI(e->getSourceRange(), "UnaryOp real/imag"); - return LValue(); + if (op == UO_Imag) { + cgm.errorNYI(e->getSourceRange(), "UnaryOp real/imag"); + return LValue(); + } + + LValue lv = emitLValue(e->getSubExpr()); + assert(lv.isSimple() && "real/imag on non-ordinary l-value"); + + // __real is valid on scalars. This is a faster way of testing that. + // __imag can only produce an rvalue on scalars. + if (e->getOpcode() == UO_Real && + !mlir::isa(lv.getAddress().getElementType())) { + assert(e->getSubExpr()->getType()->isArithmeticType()); + return lv; + } + + QualType exprTy = getContext().getCanonicalType(e->getSubExpr()->getType()); + QualType elemTy = exprTy->castAs()->getElementType(); + mlir::Location loc = getLoc(e->getExprLoc()); + Address component = builder.createComplexRealPtr(loc, lv.getAddress()); + assert(!cir::MissingFeatures::opTBAA()); + LValue elemLV = makeAddrLValue(component, elemTy); + elemLV.getQuals().addQualifiers(lv.getQuals()); + return elemLV; } case UO_PreInc: case UO_PreDec: { @@ -1062,11 +1100,10 @@ LValue CIRGenFunction::emitBinaryOperatorLValue(const BinaryOperator *e) { LValue lv = emitLValue(e->getLHS()); SourceLocRAIIObject loc{*this, getLoc(e->getSourceRange())}; - if (lv.isBitField()) { - cgm.errorNYI(e->getSourceRange(), "bitfields"); - return {}; - } - emitStoreThroughLValue(rv, lv); + if (lv.isBitField()) + emitStoreThroughBitfieldLValue(rv, lv); + else + emitStoreThroughLValue(rv, lv); if (getLangOpts().OpenMP) { cgm.errorNYI(e->getSourceRange(), "openmp"); @@ -1578,10 +1615,15 @@ void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e, delegating = true; break; case CXXConstructionKind::VirtualBase: - case CXXConstructionKind::NonVirtualBase: + // This should just set 'forVirtualBase' to true and fall through, but + // virtual base class support is otherwise missing, so this needs to wait + // until it can be tested. cgm.errorNYI(e->getSourceRange(), - "emitCXXConstructExpr: other construction kind"); + "emitCXXConstructExpr: virtual base constructor"); return; + case CXXConstructionKind::NonVirtualBase: + type = Ctor_Base; + break; } emitCXXConstructorCall(cd, type, forVirtualBase, delegating, dest, e); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index ffe1b701b244e..0d12c5c3edded 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -16,6 +16,7 @@ #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/AST/Expr.h" +#include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include @@ -362,6 +363,28 @@ void AggExprEmitter::visitCXXParenListOrInitListExpr( "visitCXXParenListOrInitListExpr Record or VariableSizeArray type"); } +// TODO(cir): This could be shared with classic codegen. +AggValueSlot::Overlap_t CIRGenFunction::getOverlapForBaseInit( + const CXXRecordDecl *rd, const CXXRecordDecl *baseRD, bool isVirtual) { + // If the most-derived object is a field declared with [[no_unique_address]], + // the tail padding of any virtual base could be reused for other subobjects + // of that field's class. + if (isVirtual) + return AggValueSlot::MayOverlap; + + // If the base class is laid out entirely within the nvsize of the derived + // class, its tail padding cannot yet be initialized, so we can issue + // stores at the full width of the base class. + const ASTRecordLayout &layout = getContext().getASTRecordLayout(rd); + if (layout.getBaseClassOffset(baseRD) + + getContext().getASTRecordLayout(baseRD).getSize() <= + layout.getNonVirtualSize()) + return AggValueSlot::DoesNotOverlap; + + // The tail padding may contain values we need to preserve. + return AggValueSlot::MayOverlap; +} + void CIRGenFunction::emitAggExpr(const Expr *e, AggValueSlot slot) { AggExprEmitter(*this, slot).Visit(const_cast(e)); } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index 84fad959ebf49..cb83691b4452d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -57,6 +57,55 @@ class ComplexExprEmitter : public StmtVisitor { mlir::Value VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *e); mlir::Value VisitUnaryDeref(const Expr *e); + + struct BinOpInfo { + mlir::Location loc; + mlir::Value lhs{}; + mlir::Value rhs{}; + QualType ty{}; // Computation Type. + FPOptions fpFeatures{}; + }; + + BinOpInfo emitBinOps(const BinaryOperator *e, + QualType promotionTy = QualType()); + + mlir::Value emitPromoted(const Expr *e, QualType promotionTy); + + mlir::Value emitPromotedComplexOperand(const Expr *e, QualType promotionTy); + + mlir::Value emitBinAdd(const BinOpInfo &op); + + QualType getPromotionType(QualType ty, bool isDivOpCode = false) { + if (auto *complexTy = ty->getAs()) { + QualType elementTy = complexTy->getElementType(); + if (isDivOpCode && elementTy->isFloatingType() && + cgf.getLangOpts().getComplexRange() == + LangOptions::ComplexRangeKind::CX_Promoted) { + cgf.cgm.errorNYI("HigherPrecisionTypeForComplexArithmetic"); + return QualType(); + } + + if (elementTy.UseExcessPrecision(cgf.getContext())) + return cgf.getContext().getComplexType(cgf.getContext().FloatTy); + } + + if (ty.UseExcessPrecision(cgf.getContext())) + return cgf.getContext().FloatTy; + return QualType(); + } + +#define HANDLEBINOP(OP) \ + mlir::Value VisitBin##OP(const BinaryOperator *e) { \ + QualType promotionTy = getPromotionType( \ + e->getType(), e->getOpcode() == BinaryOperatorKind::BO_Div); \ + mlir::Value result = emitBin##OP(emitBinOps(e, promotionTy)); \ + if (!promotionTy.isNull()) \ + cgf.cgm.errorNYI("Binop emitUnPromotedValue"); \ + return result; \ + } + + HANDLEBINOP(Add) +#undef HANDLEBINOP }; } // namespace @@ -242,12 +291,8 @@ mlir::Value ComplexExprEmitter::VisitInitListExpr(const InitListExpr *e) { } assert(e->getNumInits() == 0 && "Unexpected number of inits"); - QualType complexElemTy = - e->getType()->castAs()->getElementType(); - mlir::Type complexElemLLVMTy = cgf.convertType(complexElemTy); - mlir::TypedAttr defaultValue = builder.getZeroInitAttr(complexElemLLVMTy); - auto complexAttr = cir::ConstComplexAttr::get(defaultValue, defaultValue); - return builder.create(loc, complexAttr); + mlir::Type complexTy = cgf.convertType(e->getType()); + return builder.getNullValue(complexTy, loc); } mlir::Value @@ -291,6 +336,60 @@ mlir::Value ComplexExprEmitter::VisitUnaryDeref(const Expr *e) { return emitLoadOfLValue(e); } +mlir::Value ComplexExprEmitter::emitPromoted(const Expr *e, + QualType promotionTy) { + e = e->IgnoreParens(); + if (const auto *bo = dyn_cast(e)) { + switch (bo->getOpcode()) { +#define HANDLE_BINOP(OP) \ + case BO_##OP: \ + return emitBin##OP(emitBinOps(bo, promotionTy)); + HANDLE_BINOP(Add) +#undef HANDLE_BINOP + default: + break; + } + } else if (isa(e)) { + cgf.cgm.errorNYI("emitPromoted UnaryOperator"); + return {}; + } + + mlir::Value result = Visit(const_cast(e)); + if (!promotionTy.isNull()) + cgf.cgm.errorNYI("emitPromoted emitPromotedValue"); + + return result; +} + +mlir::Value +ComplexExprEmitter::emitPromotedComplexOperand(const Expr *e, + QualType promotionTy) { + if (e->getType()->isAnyComplexType()) { + if (!promotionTy.isNull()) + return cgf.emitPromotedComplexExpr(e, promotionTy); + return Visit(const_cast(e)); + } + + cgf.cgm.errorNYI("emitPromotedComplexOperand non-complex type"); + return {}; +} + +ComplexExprEmitter::BinOpInfo +ComplexExprEmitter::emitBinOps(const BinaryOperator *e, QualType promotionTy) { + BinOpInfo binOpInfo{cgf.getLoc(e->getExprLoc())}; + binOpInfo.lhs = emitPromotedComplexOperand(e->getLHS(), promotionTy); + binOpInfo.rhs = emitPromotedComplexOperand(e->getRHS(), promotionTy); + binOpInfo.ty = promotionTy.isNull() ? e->getType() : promotionTy; + binOpInfo.fpFeatures = e->getFPFeaturesInEffect(cgf.getLangOpts()); + return binOpInfo; +} + +mlir::Value ComplexExprEmitter::emitBinAdd(const BinOpInfo &op) { + assert(!cir::MissingFeatures::fastMathFlags()); + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + return builder.create(op.loc, op.lhs, op.rhs); +} + LValue CIRGenFunction::emitComplexAssignmentLValue(const BinaryOperator *e) { assert(e->getOpcode() == BO_Assign && "Expected assign op"); @@ -313,3 +412,8 @@ void CIRGenFunction::emitStoreOfComplex(mlir::Location loc, mlir::Value v, LValue dest, bool isInit) { ComplexExprEmitter(*this).emitStoreOfComplex(loc, v, dest, isInit); } + +mlir::Value CIRGenFunction::emitPromotedComplexExpr(const Expr *e, + QualType promotionType) { + return ComplexExprEmitter(*this).emitPromoted(e, promotionType); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 76353bae68e21..5feb5fc94d983 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -562,6 +562,19 @@ class CIRGenFunction : public CIRGenTypeCache { } Address loadCXXThisAddress(); + /// Convert the given pointer to a complete class to the given direct base. + Address getAddressOfDirectBaseInCompleteClass(mlir::Location loc, + Address value, + const CXXRecordDecl *derived, + const CXXRecordDecl *base, + bool baseIsVirtual); + + /// Determine whether a base class initialization may overlap some other + /// object. + AggValueSlot::Overlap_t getOverlapForBaseInit(const CXXRecordDecl *rd, + const CXXRecordDecl *baseRD, + bool isVirtual); + /// Get an appropriate 'undef' rvalue for the given type. /// TODO: What's the equivalent for MLIR? Currently we're only using this for /// void types so it just returns RValue::get(nullptr) but it'll need @@ -762,6 +775,9 @@ class CIRGenFunction : public CIRGenTypeCache { void emitAutoVarCleanups(const AutoVarEmission &emission); void emitAutoVarInit(const AutoVarEmission &emission); + void emitBaseInitializer(mlir::Location loc, const CXXRecordDecl *classDecl, + CXXCtorInitializer *baseInit); + LValue emitBinaryOperatorLValue(const BinaryOperator *e); mlir::LogicalResult emitBreakStmt(const clang::BreakStmt &s); @@ -886,6 +902,8 @@ class CIRGenFunction : public CIRGenTypeCache { void emitInitializerForField(clang::FieldDecl *field, LValue lhs, clang::Expr *init); + mlir::Value emitPromotedComplexExpr(const Expr *e, QualType promotionType); + mlir::Value emitPromotedScalarExpr(const Expr *e, QualType promotionType); /// Emit the computation of the specified expression of scalar type. diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index c1434ee697f4c..8b2883b50d2e2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1258,6 +1258,8 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) { case Decl::Enum: case Decl::Using: // using X; [C++] case Decl::UsingDirective: // using namespace X; [C++] + case Decl::UsingEnum: // using enum X; [C++] + case Decl::NamespaceAlias: case Decl::Typedef: case Decl::TypeAlias: // using foo = bar; [C++11] case Decl::Record: diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index e1b0f805a7b21..0a6dba5e80a62 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -186,6 +186,8 @@ class LValue { bool isBitField() const { return lvType == BitField; } bool isVolatile() const { return quals.hasVolatile(); } + bool isVolatileQualified() const { return quals.hasVolatile(); } + unsigned getVRQualifiers() const { return quals.getCVRQualifiers() & ~clang::Qualifiers::Const; } diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 8512b229c2663..5fe5ac827fff9 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -2066,6 +2066,10 @@ LogicalResult cir::ComplexRealOp::verify() { } OpFoldResult cir::ComplexRealOp::fold(FoldAdaptor adaptor) { + if (auto complexCreateOp = + dyn_cast_or_null(getOperand().getDefiningOp())) + return complexCreateOp.getOperand(0); + auto complex = mlir::cast_if_present(adaptor.getOperand()); return complex ? complex.getReal() : nullptr; @@ -2084,11 +2088,33 @@ LogicalResult cir::ComplexImagOp::verify() { } OpFoldResult cir::ComplexImagOp::fold(FoldAdaptor adaptor) { + if (auto complexCreateOp = + dyn_cast_or_null(getOperand().getDefiningOp())) + return complexCreateOp.getOperand(1); + auto complex = mlir::cast_if_present(adaptor.getOperand()); return complex ? complex.getImag() : nullptr; } +//===----------------------------------------------------------------------===// +// ComplexRealPtrOp +//===----------------------------------------------------------------------===// + +LogicalResult cir::ComplexRealPtrOp::verify() { + mlir::Type resultPointeeTy = getType().getPointee(); + cir::PointerType operandPtrTy = getOperand().getType(); + auto operandPointeeTy = + mlir::cast(operandPtrTy.getPointee()); + + if (resultPointeeTy != operandPointeeTy.getElementType()) { + emitOpError() << ": result type does not match operand type"; + return failure(); + } + + return success(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index af307f6ad673d..3446265769a2c 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2048,9 +2048,11 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMBrOpLowering, CIRToLLVMCallOpLowering, CIRToLLVMCmpOpLowering, + CIRToLLVMComplexAddOpLowering, CIRToLLVMComplexCreateOpLowering, CIRToLLVMComplexImagOpLowering, CIRToLLVMComplexRealOpLowering, + CIRToLLVMComplexRealPtrOpLowering, CIRToLLVMConstantOpLowering, CIRToLLVMExpectOpLowering, CIRToLLVMFuncOpLowering, @@ -2058,6 +2060,7 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMGetGlobalOpLowering, CIRToLLVMGetMemberOpLowering, CIRToLLVMSelectOpLowering, + CIRToLLVMSetBitfieldOpLowering, CIRToLLVMShiftOpLowering, CIRToLLVMStackRestoreOpLowering, CIRToLLVMStackSaveOpLowering, @@ -2357,6 +2360,55 @@ mlir::LogicalResult CIRToLLVMVecTernaryOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMComplexAddOpLowering::matchAndRewrite( + cir::ComplexAddOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Value lhs = adaptor.getLhs(); + mlir::Value rhs = adaptor.getRhs(); + mlir::Location loc = op.getLoc(); + + auto complexType = mlir::cast(op.getLhs().getType()); + mlir::Type complexElemTy = + getTypeConverter()->convertType(complexType.getElementType()); + auto lhsReal = + rewriter.create(loc, complexElemTy, lhs, 0); + auto lhsImag = + rewriter.create(loc, complexElemTy, lhs, 1); + auto rhsReal = + rewriter.create(loc, complexElemTy, rhs, 0); + auto rhsImag = + rewriter.create(loc, complexElemTy, rhs, 1); + + mlir::Value newReal; + mlir::Value newImag; + if (complexElemTy.isInteger()) { + newReal = rewriter.create(loc, complexElemTy, lhsReal, + rhsReal); + newImag = rewriter.create(loc, complexElemTy, lhsImag, + rhsImag); + } else { + assert(!cir::MissingFeatures::fastMathFlags()); + assert(!cir::MissingFeatures::fpConstraints()); + newReal = rewriter.create(loc, complexElemTy, lhsReal, + rhsReal); + newImag = rewriter.create(loc, complexElemTy, lhsImag, + rhsImag); + } + + mlir::Type complexLLVMTy = + getTypeConverter()->convertType(op.getResult().getType()); + auto initialComplex = + rewriter.create(op->getLoc(), complexLLVMTy); + + auto realComplex = rewriter.create( + op->getLoc(), initialComplex, newReal, 0); + + rewriter.replaceOpWithNewOp(op, realComplex, + newImag, 1); + + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMComplexCreateOpLowering::matchAndRewrite( cir::ComplexCreateOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -2393,6 +2445,105 @@ mlir::LogicalResult CIRToLLVMComplexImagOpLowering::matchAndRewrite( return mlir::success(); } +mlir::IntegerType computeBitfieldIntType(mlir::Type storageType, + mlir::MLIRContext *context, + unsigned &storageSize) { + return TypeSwitch(storageType) + .Case([&](cir::ArrayType atTy) { + storageSize = atTy.getSize() * 8; + return mlir::IntegerType::get(context, storageSize); + }) + .Case([&](cir::IntType intTy) { + storageSize = intTy.getWidth(); + return mlir::IntegerType::get(context, storageSize); + }) + .Default([](mlir::Type) -> mlir::IntegerType { + llvm_unreachable( + "Either ArrayType or IntType expected for bitfields storage"); + }); +} + +mlir::LogicalResult CIRToLLVMSetBitfieldOpLowering::matchAndRewrite( + cir::SetBitfieldOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(op); + + cir::BitfieldInfoAttr info = op.getBitfieldInfo(); + uint64_t size = info.getSize(); + uint64_t offset = info.getOffset(); + mlir::Type storageType = info.getStorageType(); + mlir::MLIRContext *context = storageType.getContext(); + + unsigned storageSize = 0; + + mlir::IntegerType intType = + computeBitfieldIntType(storageType, context, storageSize); + + mlir::Value srcVal = createIntCast(rewriter, adaptor.getSrc(), intType); + unsigned srcWidth = storageSize; + mlir::Value resultVal = srcVal; + + if (storageSize != size) { + assert(storageSize > size && "Invalid bitfield size."); + + mlir::Value val = rewriter.create( + op.getLoc(), intType, adaptor.getAddr(), /* alignment */ 0, + op.getIsVolatile()); + + srcVal = + createAnd(rewriter, srcVal, llvm::APInt::getLowBitsSet(srcWidth, size)); + resultVal = srcVal; + srcVal = createShL(rewriter, srcVal, offset); + + // Mask out the original value. + val = createAnd(rewriter, val, + ~llvm::APInt::getBitsSet(srcWidth, offset, offset + size)); + + // Or together the unchanged values and the source value. + srcVal = rewriter.create(op.getLoc(), val, srcVal); + } + + rewriter.create(op.getLoc(), srcVal, adaptor.getAddr(), + /* alignment */ 0, op.getIsVolatile()); + + mlir::Type resultTy = getTypeConverter()->convertType(op.getType()); + + if (info.getIsSigned()) { + assert(size <= storageSize); + unsigned highBits = storageSize - size; + + if (highBits) { + resultVal = createShL(rewriter, resultVal, highBits); + resultVal = createAShR(rewriter, resultVal, highBits); + } + } + + resultVal = createIntCast(rewriter, resultVal, + mlir::cast(resultTy), + info.getIsSigned()); + + rewriter.replaceOp(op, resultVal); + return mlir::success(); +} + +mlir::LogicalResult CIRToLLVMComplexRealPtrOpLowering::matchAndRewrite( + cir::ComplexRealPtrOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + cir::PointerType operandTy = op.getOperand().getType(); + mlir::Type resultLLVMTy = getTypeConverter()->convertType(op.getType()); + mlir::Type elementLLVMTy = + getTypeConverter()->convertType(operandTy.getPointee()); + + mlir::LLVM::GEPArg gepIndices[2] = {0, 0}; + mlir::LLVM::GEPNoWrapFlags inboundsNuw = + mlir::LLVM::GEPNoWrapFlags::inbounds | mlir::LLVM::GEPNoWrapFlags::nuw; + rewriter.replaceOpWithNewOp( + op, resultLLVMTy, elementLLVMTy, adaptor.getOperand(), gepIndices, + inboundsNuw); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMGetBitfieldOpLowering::matchAndRewrite( cir::GetBitfieldOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -2408,19 +2559,7 @@ mlir::LogicalResult CIRToLLVMGetBitfieldOpLowering::matchAndRewrite( unsigned storageSize = 0; mlir::IntegerType intType = - TypeSwitch(storageType) - .Case([&](cir::ArrayType atTy) { - storageSize = atTy.getSize() * 8; - return mlir::IntegerType::get(context, storageSize); - }) - .Case([&](cir::IntType intTy) { - storageSize = intTy.getWidth(); - return mlir::IntegerType::get(context, storageSize); - }) - .Default([](mlir::Type) -> mlir::IntegerType { - llvm_unreachable( - "Either ArrayType or IntType expected for bitfields storage"); - }); + computeBitfieldIntType(storageType, context, storageSize); mlir::Value val = rewriter.create( op.getLoc(), intType, adaptor.getAddr(), 0, op.getIsVolatile()); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index d9fb91066317b..ed158eb7289dd 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -513,6 +513,36 @@ class CIRToLLVMComplexImagOpLowering mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMComplexRealPtrOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::ComplexRealPtrOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMComplexAddOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::ComplexAddOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMSetBitfieldOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::SetBitfieldOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMGetBitfieldOpLowering : public mlir::OpConversionPattern { public: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 48c91eb4a5b4f..5f2eb76e7bacb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5987,8 +5987,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto *Zero = llvm::ConstantInt::get(IntTy, 0); for (unsigned I = First; I < NumArgs; ++I) { auto *Index = llvm::ConstantInt::get(IntTy, I - First); - auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr, - {Zero, Index}); + auto *GEP = + Builder.CreateGEP(Tmp.getElementType(), Alloca, {Zero, Index}); if (I == First) ElemPtr = GEP; auto *V = diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b76163afc8aa4..fe1865888bdd0 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7047,6 +7047,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fapinotes-modules"); Args.AddLastArg(CmdArgs, options::OPT_fapinotes_swift_version); + if (Args.hasFlag(options::OPT_fswift_version_independent_apinotes, + options::OPT_fno_swift_version_independent_apinotes, false)) + CmdArgs.push_back("-fswift-version-independent-apinotes"); + // -fblocks=0 is default. if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks, TC.IsBlocksDefault()) || @@ -9152,7 +9156,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, // specific architecture via -Xarch_ will not be forwarded. ArgStringList CompilerArgs; ArgStringList LinkerArgs; - for (Arg *A : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { + const DerivedArgList &ToolChainArgs = + C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind); + for (Arg *A : ToolChainArgs) { if (A->getOption().matches(OPT_Zlinker_input)) LinkerArgs.emplace_back(A->getValue()); else if (ShouldForward(CompilerOptions, A)) @@ -9161,6 +9167,11 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, A->render(Args, LinkerArgs); } + // If the user explicitly requested it via `--offload-arch` we should + // extract it from any static libraries if present. + for (StringRef Arg : ToolChainArgs.getAllArgValues(OPT_offload_arch_EQ)) + CmdArgs.emplace_back(Args.MakeArgString("--should-extract=" + Arg)); + // If this is OpenMP the device linker will need `-lompdevice`. if (Kind == Action::OFK_OpenMP && !Args.hasArg(OPT_no_offloadlib) && (TC->getTriple().isAMDGPU() || TC->getTriple().isNVPTX())) diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index def0d73e77539..24912c25ef8c6 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -26,18 +26,6 @@ namespace clang { namespace format { static constexpr StringRef Blanks = " \t\v\f\r"; -static bool IsBlank(char C) { - switch (C) { - case ' ': - case '\t': - case '\v': - case '\f': - case '\r': - return true; - default: - return false; - } -} static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style) { @@ -193,7 +181,7 @@ getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, if (Chars > MaxSplit || Text.size() <= Advance) break; - if (IsBlank(Text[0])) + if (Blanks.contains(Text[0])) SpaceOffset = SplitPoint; if (Text[0] == '/') SlashOffset = SplitPoint; diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 40b62b2a993d8..d8ee5cb90aaa4 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -1329,6 +1329,8 @@ FormatToken *FormatTokenLexer::getNextToken() { if (FormatTok->is(tok::unknown)) FormatTok->setType(TT_ImplicitStringLiteral); + const bool IsCpp = Style.isCpp(); + // JavaScript and Java do not allow to escape the end of the line with a // backslash. Backslashes are syntax errors in plain source, but can occur in // comments. When a single line comment ends with a \, it'll cause the next @@ -1336,16 +1338,17 @@ FormatToken *FormatTokenLexer::getNextToken() { // finds comments that contain a backslash followed by a line break, truncates // the comment token at the backslash, and resets the lexer to restart behind // the backslash. - if ((Style.isJavaScript() || Style.isJava()) && FormatTok->is(tok::comment) && - FormatTok->TokenText.starts_with("//")) { - size_t BackslashPos = FormatTok->TokenText.find('\\'); - while (BackslashPos != StringRef::npos) { - if (BackslashPos + 1 < FormatTok->TokenText.size() && - FormatTok->TokenText[BackslashPos + 1] == '\n') { - truncateToken(BackslashPos + 1); + if (const auto Text = FormatTok->TokenText; + Text.starts_with("//") && + (IsCpp || Style.isJavaScript() || Style.isJava())) { + assert(FormatTok->is(tok::comment)); + for (auto Pos = Text.find('\\'); Pos++ != StringRef::npos; + Pos = Text.find('\\', Pos)) { + if (Pos < Text.size() && Text[Pos] == '\n' && + (!IsCpp || Text.substr(Pos + 1).ltrim().starts_with("//"))) { + truncateToken(Pos); break; } - BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1); } } @@ -1450,7 +1453,7 @@ FormatToken *FormatTokenLexer::getNextToken() { Column = FormatTok->LastLineColumnWidth; } - if (Style.isCpp()) { + if (IsCpp) { auto *Identifier = FormatTok->Tok.getIdentifierInfo(); auto it = Macros.find(Identifier); if ((Tokens.empty() || !Tokens.back()->Tok.getIdentifierInfo() || diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index cf86c62f3b671..dc5f6faefbab4 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -25,44 +25,41 @@ RootSignatureParser::RootSignatureParser( Lexer(Signature->getString()), PP(PP), CurToken(0) {} bool RootSignatureParser::parse() { - // Iterate as many RootElements as possible - do { + // Iterate as many RootSignatureElements as possible, until we hit the + // end of the stream + while (!peekExpectedToken(TokenKind::end_of_stream)) { if (tryConsumeExpectedToken(TokenKind::kw_RootFlags)) { auto Flags = parseRootFlags(); if (!Flags.has_value()) return true; Elements.push_back(*Flags); - } - - if (tryConsumeExpectedToken(TokenKind::kw_RootConstants)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_RootConstants)) { auto Constants = parseRootConstants(); if (!Constants.has_value()) return true; Elements.push_back(*Constants); - } - - if (tryConsumeExpectedToken(TokenKind::kw_DescriptorTable)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_DescriptorTable)) { auto Table = parseDescriptorTable(); if (!Table.has_value()) return true; Elements.push_back(*Table); - } - - if (tryConsumeExpectedToken( - {TokenKind::kw_CBV, TokenKind::kw_SRV, TokenKind::kw_UAV})) { + } else if (tryConsumeExpectedToken( + {TokenKind::kw_CBV, TokenKind::kw_SRV, TokenKind::kw_UAV})) { auto Descriptor = parseRootDescriptor(); if (!Descriptor.has_value()) return true; Elements.push_back(*Descriptor); - } - - if (tryConsumeExpectedToken(TokenKind::kw_StaticSampler)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_StaticSampler)) { auto Sampler = parseStaticSampler(); if (!Sampler.has_value()) return true; Elements.push_back(*Sampler); } - } while (tryConsumeExpectedToken(TokenKind::pu_comma)); + + // ',' denotes another element, otherwise, expected to be at end of stream + if (!tryConsumeExpectedToken(TokenKind::pu_comma)) + break; + } return consumeExpectedToken(TokenKind::end_of_stream, diag::err_hlsl_unexpected_end_of_params, @@ -139,6 +136,11 @@ std::optional RootSignatureParser::parseRootConstants() { if (!Params.has_value()) return std::nullopt; + if (consumeExpectedToken(TokenKind::pu_r_paren, + diag::err_hlsl_unexpected_end_of_params, + /*param of=*/TokenKind::kw_RootConstants)) + return std::nullopt; + // Check mandatory parameters where provided if (!Params->Num32BitConstants.has_value()) { reportDiag(diag::err_hlsl_rootsig_missing_param) @@ -162,11 +164,6 @@ std::optional RootSignatureParser::parseRootConstants() { if (Params->Space.has_value()) Constants.Space = Params->Space.value(); - if (consumeExpectedToken(TokenKind::pu_r_paren, - diag::err_hlsl_unexpected_end_of_params, - /*param of=*/TokenKind::kw_RootConstants)) - return std::nullopt; - return Constants; } @@ -206,6 +203,11 @@ std::optional RootSignatureParser::parseRootDescriptor() { if (!Params.has_value()) return std::nullopt; + if (consumeExpectedToken(TokenKind::pu_r_paren, + diag::err_hlsl_unexpected_end_of_params, + /*param of=*/DescriptorKind)) + return std::nullopt; + // Check mandatory parameters were provided if (!Params->Reg.has_value()) { reportDiag(diag::err_hlsl_rootsig_missing_param) << ExpectedReg; @@ -224,11 +226,6 @@ std::optional RootSignatureParser::parseRootDescriptor() { if (Params->Flags.has_value()) Descriptor.Flags = Params->Flags.value(); - if (consumeExpectedToken(TokenKind::pu_r_paren, - diag::err_hlsl_unexpected_end_of_params, - /*param of=*/TokenKind::kw_RootConstants)) - return std::nullopt; - return Descriptor; } @@ -243,18 +240,18 @@ std::optional RootSignatureParser::parseDescriptorTable() { DescriptorTable Table; std::optional Visibility; - // Iterate as many Clauses as possible - do { + // Iterate as many Clauses as possible, until we hit ')' + while (!peekExpectedToken(TokenKind::pu_r_paren)) { if (tryConsumeExpectedToken({TokenKind::kw_CBV, TokenKind::kw_SRV, TokenKind::kw_UAV, TokenKind::kw_Sampler})) { + // DescriptorTableClause - CBV, SRV, UAV, or Sampler auto Clause = parseDescriptorTableClause(); if (!Clause.has_value()) return std::nullopt; Elements.push_back(*Clause); Table.NumClauses++; - } - - if (tryConsumeExpectedToken(TokenKind::kw_visibility)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_visibility)) { + // visibility = SHADER_VISIBILITY if (Visibility.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -267,17 +264,21 @@ std::optional RootSignatureParser::parseDescriptorTable() { if (!Visibility.has_value()) return std::nullopt; } - } while (tryConsumeExpectedToken(TokenKind::pu_comma)); - // Fill in optional visibility - if (Visibility.has_value()) - Table.Visibility = Visibility.value(); + // ',' denotes another element, otherwise, expected to be at ')' + if (!tryConsumeExpectedToken(TokenKind::pu_comma)) + break; + } if (consumeExpectedToken(TokenKind::pu_r_paren, diag::err_hlsl_unexpected_end_of_params, /*param of=*/TokenKind::kw_DescriptorTable)) return std::nullopt; + // Fill in optional visibility + if (Visibility.has_value()) + Table.Visibility = Visibility.value(); + return Table; } @@ -323,6 +324,11 @@ RootSignatureParser::parseDescriptorTableClause() { if (!Params.has_value()) return std::nullopt; + if (consumeExpectedToken(TokenKind::pu_r_paren, + diag::err_hlsl_unexpected_end_of_params, + /*param of=*/ParamKind)) + return std::nullopt; + // Check mandatory parameters were provided if (!Params->Reg.has_value()) { reportDiag(diag::err_hlsl_rootsig_missing_param) << ExpectedReg; @@ -344,11 +350,6 @@ RootSignatureParser::parseDescriptorTableClause() { if (Params->Flags.has_value()) Clause.Flags = Params->Flags.value(); - if (consumeExpectedToken(TokenKind::pu_r_paren, - diag::err_hlsl_unexpected_end_of_params, - /*param of=*/ParamKind)) - return std::nullopt; - return Clause; } @@ -366,6 +367,11 @@ std::optional RootSignatureParser::parseStaticSampler() { if (!Params.has_value()) return std::nullopt; + if (consumeExpectedToken(TokenKind::pu_r_paren, + diag::err_hlsl_unexpected_end_of_params, + /*param of=*/TokenKind::kw_StaticSampler)) + return std::nullopt; + // Check mandatory parameters were provided if (!Params->Reg.has_value()) { reportDiag(diag::err_hlsl_rootsig_missing_param) << TokenKind::sReg; @@ -411,11 +417,6 @@ std::optional RootSignatureParser::parseStaticSampler() { if (Params->Visibility.has_value()) Sampler.Visibility = Params->Visibility.value(); - if (consumeExpectedToken(TokenKind::pu_r_paren, - diag::err_hlsl_unexpected_end_of_params, - /*param of=*/TokenKind::kw_StaticSampler)) - return std::nullopt; - return Sampler; } @@ -428,9 +429,9 @@ RootSignatureParser::parseRootConstantParams() { "Expects to only be invoked starting at given token"); ParsedConstantParams Params; - do { - // `num32BitConstants` `=` POS_INT + while (!peekExpectedToken(TokenKind::pu_r_paren)) { if (tryConsumeExpectedToken(TokenKind::kw_num32BitConstants)) { + // `num32BitConstants` `=` POS_INT if (Params.Num32BitConstants.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -443,10 +444,8 @@ RootSignatureParser::parseRootConstantParams() { if (!Num32BitConstants.has_value()) return std::nullopt; Params.Num32BitConstants = Num32BitConstants; - } - - // `b` POS_INT - if (tryConsumeExpectedToken(TokenKind::bReg)) { + } else if (tryConsumeExpectedToken(TokenKind::bReg)) { + // `b` POS_INT if (Params.Reg.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -455,10 +454,8 @@ RootSignatureParser::parseRootConstantParams() { if (!Reg.has_value()) return std::nullopt; Params.Reg = Reg; - } - - // `space` `=` POS_INT - if (tryConsumeExpectedToken(TokenKind::kw_space)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_space)) { + // `space` `=` POS_INT if (Params.Space.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -471,10 +468,8 @@ RootSignatureParser::parseRootConstantParams() { if (!Space.has_value()) return std::nullopt; Params.Space = Space; - } - - // `visibility` `=` SHADER_VISIBILITY - if (tryConsumeExpectedToken(TokenKind::kw_visibility)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_visibility)) { + // `visibility` `=` SHADER_VISIBILITY if (Params.Visibility.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -488,7 +483,11 @@ RootSignatureParser::parseRootConstantParams() { return std::nullopt; Params.Visibility = Visibility; } - } while (tryConsumeExpectedToken(TokenKind::pu_comma)); + + // ',' denotes another element, otherwise, expected to be at ')' + if (!tryConsumeExpectedToken(TokenKind::pu_comma)) + break; + } return Params; } @@ -499,9 +498,9 @@ RootSignatureParser::parseRootDescriptorParams(TokenKind RegType) { "Expects to only be invoked starting at given token"); ParsedRootDescriptorParams Params; - do { - // ( `b` | `t` | `u`) POS_INT + while (!peekExpectedToken(TokenKind::pu_r_paren)) { if (tryConsumeExpectedToken(RegType)) { + // ( `b` | `t` | `u`) POS_INT if (Params.Reg.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -510,10 +509,8 @@ RootSignatureParser::parseRootDescriptorParams(TokenKind RegType) { if (!Reg.has_value()) return std::nullopt; Params.Reg = Reg; - } - - // `space` `=` POS_INT - if (tryConsumeExpectedToken(TokenKind::kw_space)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_space)) { + // `space` `=` POS_INT if (Params.Space.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -526,10 +523,8 @@ RootSignatureParser::parseRootDescriptorParams(TokenKind RegType) { if (!Space.has_value()) return std::nullopt; Params.Space = Space; - } - - // `visibility` `=` SHADER_VISIBILITY - if (tryConsumeExpectedToken(TokenKind::kw_visibility)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_visibility)) { + // `visibility` `=` SHADER_VISIBILITY if (Params.Visibility.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -542,10 +537,8 @@ RootSignatureParser::parseRootDescriptorParams(TokenKind RegType) { if (!Visibility.has_value()) return std::nullopt; Params.Visibility = Visibility; - } - - // `flags` `=` ROOT_DESCRIPTOR_FLAGS - if (tryConsumeExpectedToken(TokenKind::kw_flags)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_flags)) { + // `flags` `=` ROOT_DESCRIPTOR_FLAGS if (Params.Flags.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -559,7 +552,11 @@ RootSignatureParser::parseRootDescriptorParams(TokenKind RegType) { return std::nullopt; Params.Flags = Flags; } - } while (tryConsumeExpectedToken(TokenKind::pu_comma)); + + // ',' denotes another element, otherwise, expected to be at ')' + if (!tryConsumeExpectedToken(TokenKind::pu_comma)) + break; + } return Params; } @@ -570,9 +567,9 @@ RootSignatureParser::parseDescriptorTableClauseParams(TokenKind RegType) { "Expects to only be invoked starting at given token"); ParsedClauseParams Params; - do { - // ( `b` | `t` | `u` | `s`) POS_INT + while (!peekExpectedToken(TokenKind::pu_r_paren)) { if (tryConsumeExpectedToken(RegType)) { + // ( `b` | `t` | `u` | `s`) POS_INT if (Params.Reg.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -581,10 +578,8 @@ RootSignatureParser::parseDescriptorTableClauseParams(TokenKind RegType) { if (!Reg.has_value()) return std::nullopt; Params.Reg = Reg; - } - - // `numDescriptors` `=` POS_INT | unbounded - if (tryConsumeExpectedToken(TokenKind::kw_numDescriptors)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_numDescriptors)) { + // `numDescriptors` `=` POS_INT | unbounded if (Params.NumDescriptors.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -603,10 +598,8 @@ RootSignatureParser::parseDescriptorTableClauseParams(TokenKind RegType) { } Params.NumDescriptors = NumDescriptors; - } - - // `space` `=` POS_INT - if (tryConsumeExpectedToken(TokenKind::kw_space)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_space)) { + // `space` `=` POS_INT if (Params.Space.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -619,10 +612,8 @@ RootSignatureParser::parseDescriptorTableClauseParams(TokenKind RegType) { if (!Space.has_value()) return std::nullopt; Params.Space = Space; - } - - // `offset` `=` POS_INT | DESCRIPTOR_RANGE_OFFSET_APPEND - if (tryConsumeExpectedToken(TokenKind::kw_offset)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_offset)) { + // `offset` `=` POS_INT | DESCRIPTOR_RANGE_OFFSET_APPEND if (Params.Offset.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -641,10 +632,8 @@ RootSignatureParser::parseDescriptorTableClauseParams(TokenKind RegType) { } Params.Offset = Offset; - } - - // `flags` `=` DESCRIPTOR_RANGE_FLAGS - if (tryConsumeExpectedToken(TokenKind::kw_flags)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_flags)) { + // `flags` `=` DESCRIPTOR_RANGE_FLAGS if (Params.Flags.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -659,7 +648,10 @@ RootSignatureParser::parseDescriptorTableClauseParams(TokenKind RegType) { Params.Flags = Flags; } - } while (tryConsumeExpectedToken(TokenKind::pu_comma)); + // ',' denotes another element, otherwise, expected to be at ')' + if (!tryConsumeExpectedToken(TokenKind::pu_comma)) + break; + } return Params; } @@ -670,9 +662,9 @@ RootSignatureParser::parseStaticSamplerParams() { "Expects to only be invoked starting at given token"); ParsedStaticSamplerParams Params; - do { - // `s` POS_INT + while (!peekExpectedToken(TokenKind::pu_r_paren)) { if (tryConsumeExpectedToken(TokenKind::sReg)) { + // `s` POS_INT if (Params.Reg.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -681,10 +673,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!Reg.has_value()) return std::nullopt; Params.Reg = Reg; - } - - // `filter` `=` FILTER - if (tryConsumeExpectedToken(TokenKind::kw_filter)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_filter)) { + // `filter` `=` FILTER if (Params.Filter.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -697,10 +687,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!Filter.has_value()) return std::nullopt; Params.Filter = Filter; - } - - // `addressU` `=` TEXTURE_ADDRESS - if (tryConsumeExpectedToken(TokenKind::kw_addressU)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_addressU)) { + // `addressU` `=` TEXTURE_ADDRESS if (Params.AddressU.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -713,10 +701,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!AddressU.has_value()) return std::nullopt; Params.AddressU = AddressU; - } - - // `addressV` `=` TEXTURE_ADDRESS - if (tryConsumeExpectedToken(TokenKind::kw_addressV)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_addressV)) { + // `addressV` `=` TEXTURE_ADDRESS if (Params.AddressV.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -729,10 +715,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!AddressV.has_value()) return std::nullopt; Params.AddressV = AddressV; - } - - // `addressW` `=` TEXTURE_ADDRESS - if (tryConsumeExpectedToken(TokenKind::kw_addressW)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_addressW)) { + // `addressW` `=` TEXTURE_ADDRESS if (Params.AddressW.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -745,10 +729,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!AddressW.has_value()) return std::nullopt; Params.AddressW = AddressW; - } - - // `mipLODBias` `=` NUMBER - if (tryConsumeExpectedToken(TokenKind::kw_mipLODBias)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_mipLODBias)) { + // `mipLODBias` `=` NUMBER if (Params.MipLODBias.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -761,10 +743,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!MipLODBias.has_value()) return std::nullopt; Params.MipLODBias = MipLODBias; - } - - // `maxAnisotropy` `=` POS_INT - if (tryConsumeExpectedToken(TokenKind::kw_maxAnisotropy)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_maxAnisotropy)) { + // `maxAnisotropy` `=` POS_INT if (Params.MaxAnisotropy.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -777,10 +757,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!MaxAnisotropy.has_value()) return std::nullopt; Params.MaxAnisotropy = MaxAnisotropy; - } - - // `comparisonFunc` `=` COMPARISON_FUNC - if (tryConsumeExpectedToken(TokenKind::kw_comparisonFunc)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_comparisonFunc)) { + // `comparisonFunc` `=` COMPARISON_FUNC if (Params.CompFunc.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -793,10 +771,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!CompFunc.has_value()) return std::nullopt; Params.CompFunc = CompFunc; - } - - // `borderColor` `=` STATIC_BORDER_COLOR - if (tryConsumeExpectedToken(TokenKind::kw_borderColor)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_borderColor)) { + // `borderColor` `=` STATIC_BORDER_COLOR if (Params.BorderColor.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -809,10 +785,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!BorderColor.has_value()) return std::nullopt; Params.BorderColor = BorderColor; - } - - // `minLOD` `=` NUMBER - if (tryConsumeExpectedToken(TokenKind::kw_minLOD)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_minLOD)) { + // `minLOD` `=` NUMBER if (Params.MinLOD.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -825,10 +799,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!MinLOD.has_value()) return std::nullopt; Params.MinLOD = MinLOD; - } - - // `maxLOD` `=` NUMBER - if (tryConsumeExpectedToken(TokenKind::kw_maxLOD)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_maxLOD)) { + // `maxLOD` `=` NUMBER if (Params.MaxLOD.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -841,10 +813,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!MaxLOD.has_value()) return std::nullopt; Params.MaxLOD = MaxLOD; - } - - // `space` `=` POS_INT - if (tryConsumeExpectedToken(TokenKind::kw_space)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_space)) { + // `space` `=` POS_INT if (Params.Space.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -857,10 +827,8 @@ RootSignatureParser::parseStaticSamplerParams() { if (!Space.has_value()) return std::nullopt; Params.Space = Space; - } - - // `visibility` `=` SHADER_VISIBILITY - if (tryConsumeExpectedToken(TokenKind::kw_visibility)) { + } else if (tryConsumeExpectedToken(TokenKind::kw_visibility)) { + // `visibility` `=` SHADER_VISIBILITY if (Params.Visibility.has_value()) { reportDiag(diag::err_hlsl_rootsig_repeat_param) << CurToken.TokKind; return std::nullopt; @@ -874,7 +842,11 @@ RootSignatureParser::parseStaticSamplerParams() { return std::nullopt; Params.Visibility = Visibility; } - } while (tryConsumeExpectedToken(TokenKind::pu_comma)); + + // ',' denotes another element, otherwise, expected to be at ')' + if (!tryConsumeExpectedToken(TokenKind::pu_comma)) + break; + } return Params; } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 7420ba2d461c6..ec8acbdff3b49 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -29,6 +29,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" +#include "clang/Analysis/Analyses/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" @@ -49,6 +50,7 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" #include #include #include @@ -985,11 +987,10 @@ static void DiagUninitUse(Sema &S, const VarDecl *VD, const UninitUse &Use, } /// Diagnose uninitialized const reference usages. -static bool DiagnoseUninitializedConstRefUse(Sema &S, const VarDecl *VD, +static void DiagnoseUninitializedConstRefUse(Sema &S, const VarDecl *VD, const UninitUse &Use) { S.Diag(Use.getUser()->getBeginLoc(), diag::warn_uninit_const_reference) << VD->getDeclName() << Use.getUser()->getSourceRange(); - return true; } /// DiagnoseUninitializedUse -- Helper function for diagnosing uses of an @@ -1531,14 +1532,13 @@ class UninitValsDiagReporter : public UninitVariablesHandler { // order of diagnostics when calling flushDiagnostics(). typedef llvm::MapVector UsesMap; UsesMap uses; - UsesMap constRefUses; public: UninitValsDiagReporter(Sema &S) : S(S) {} ~UninitValsDiagReporter() override { flushDiagnostics(); } - MappedType &getUses(UsesMap &um, const VarDecl *vd) { - MappedType &V = um[vd]; + MappedType &getUses(const VarDecl *vd) { + MappedType &V = uses[vd]; if (!V.getPointer()) V.setPointer(new UsesVec()); return V; @@ -1546,18 +1546,10 @@ class UninitValsDiagReporter : public UninitVariablesHandler { void handleUseOfUninitVariable(const VarDecl *vd, const UninitUse &use) override { - getUses(uses, vd).getPointer()->push_back(use); - } - - void handleConstRefUseOfUninitVariable(const VarDecl *vd, - const UninitUse &use) override { - getUses(constRefUses, vd).getPointer()->push_back(use); + getUses(vd).getPointer()->push_back(use); } - void handleSelfInit(const VarDecl *vd) override { - getUses(uses, vd).setInt(true); - getUses(constRefUses, vd).setInt(true); - } + void handleSelfInit(const VarDecl *vd) override { getUses(vd).setInt(true); } void flushDiagnostics() { for (const auto &P : uses) { @@ -1580,6 +1572,9 @@ class UninitValsDiagReporter : public UninitVariablesHandler { // guaranteed to produce them in line/column order, this will provide // a stable ordering. llvm::sort(*vec, [](const UninitUse &a, const UninitUse &b) { + // Move ConstRef uses to the back. + if (a.isConstRefUse() != b.isConstRefUse()) + return b.isConstRefUse(); // Prefer a more confident report over a less confident one. if (a.getKind() != b.getKind()) return a.getKind() > b.getKind(); @@ -1587,6 +1582,11 @@ class UninitValsDiagReporter : public UninitVariablesHandler { }); for (const auto &U : *vec) { + if (U.isConstRefUse()) { + DiagnoseUninitializedConstRefUse(S, vd, U); + break; + } + // If we have self-init, downgrade all uses to 'may be uninitialized'. UninitUse Use = hasSelfInit ? UninitUse(U.getUser(), false) : U; @@ -1602,32 +1602,6 @@ class UninitValsDiagReporter : public UninitVariablesHandler { } uses.clear(); - - // Flush all const reference uses diags. - for (const auto &P : constRefUses) { - const VarDecl *vd = P.first; - const MappedType &V = P.second; - - UsesVec *vec = V.getPointer(); - bool hasSelfInit = V.getInt(); - - if (!vec->empty() && hasSelfInit && hasAlwaysUninitializedUse(vec)) - DiagnoseUninitializedUse(S, vd, - UninitUse(vd->getInit()->IgnoreParenCasts(), - /* isAlwaysUninit */ true), - /* alwaysReportSelfInit */ true); - else { - for (const auto &U : *vec) { - if (DiagnoseUninitializedConstRefUse(S, vd, U)) - break; - } - } - - // Release the uses vector. - delete vec; - } - - constRefUses.clear(); } private: @@ -2744,6 +2718,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( .setAlwaysAdd(Stmt::UnaryOperatorClass); } + bool EnableLifetimeSafetyAnalysis = !Diags.isIgnored( + diag::warn_experimental_lifetime_safety_dummy_warning, D->getBeginLoc()); // Install the logical handler. std::optional LEH; if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) { @@ -2866,6 +2842,12 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( } } + // TODO: Enable lifetime safety analysis for other languages once it is + // stable. + if (EnableLifetimeSafetyAnalysis && S.getLangOpts().CPlusPlus) { + if (CFG *cfg = AC.getCFG()) + runLifetimeSafetyAnalysis(*cast(D), *cfg, AC); + } // Check for violations of "called once" parameter properties. if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus && shouldAnalyzeCalledOnceParameters(Diags, D->getBeginLoc())) { diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp index e5c6220bfb47d..87f9ae07550c2 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp @@ -697,7 +697,9 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addArraySubscriptOperators() { AST.DeclarationNames.getCXXOperatorName(OO_Subscript); addHandleAccessFunction(Subscript, /*IsConst=*/true, /*IsRef=*/true); - addHandleAccessFunction(Subscript, /*IsConst=*/false, /*IsRef=*/true); + if (getResourceAttrs().ResourceClass == llvm::dxil::ResourceClass::UAV) + addHandleAccessFunction(Subscript, /*IsConst=*/false, /*IsRef=*/true); + return *this; } @@ -714,7 +716,7 @@ BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addLoadMethods() { return *this; } -FieldDecl *BuiltinTypeDeclBuilder::getResourceHandleField() { +FieldDecl *BuiltinTypeDeclBuilder::getResourceHandleField() const { auto I = Fields.find("__handle"); assert(I != Fields.end() && I->second->getType()->isHLSLAttributedResourceType() && @@ -738,6 +740,12 @@ QualType BuiltinTypeDeclBuilder::getHandleElementType() { return SemaRef.getASTContext().Char8Ty; } +HLSLAttributedResourceType::Attributes +BuiltinTypeDeclBuilder::getResourceAttrs() const { + QualType HandleType = getResourceHandleField()->getType(); + return cast(HandleType)->getAttrs(); +} + // BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::startDefinition() { // assert(!Record->isCompleteDefinition() && "record is already complete"); // Record->startDefinition(); diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h index a52e2938104c7..36c4add20b225 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h @@ -91,10 +91,11 @@ class BuiltinTypeDeclBuilder { BuiltinTypeDeclBuilder &addConsumeMethod(); private: - FieldDecl *getResourceHandleField(); + FieldDecl *getResourceHandleField() const; QualType getFirstTemplateTypeParam(); QualType getHandleElementType(); Expr *getConstantIntExpr(int value); + HLSLAttributedResourceType::Attributes getResourceAttrs() const; }; } // namespace hlsl diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index f21cbbbdb44ee..044abb0ee08a8 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -52,63 +52,58 @@ static bool isIndirectPointerType(QualType Type) { Pointee->isMemberPointerType(); } -/// Apply nullability to the given declaration. -static void applyNullability(Sema &S, Decl *D, NullabilityKind Nullability, - VersionedInfoMetadata Metadata) { - if (!Metadata.IsActive) - return; +static void applyAPINotesType(Sema &S, Decl *decl, StringRef typeString, + VersionedInfoMetadata metadata) { + if (typeString.empty()) - auto GetModified = - [&](Decl *D, QualType QT, - NullabilityKind Nullability) -> std::optional { - QualType Original = QT; - S.CheckImplicitNullabilityTypeSpecifier(QT, Nullability, D->getLocation(), - isa(D), - /*OverrideExisting=*/true); - return (QT.getTypePtr() != Original.getTypePtr()) ? std::optional(QT) - : std::nullopt; - }; - - if (auto Function = dyn_cast(D)) { - if (auto Modified = - GetModified(D, Function->getReturnType(), Nullability)) { - const FunctionType *FnType = Function->getType()->castAs(); - if (const FunctionProtoType *proto = dyn_cast(FnType)) - Function->setType(S.Context.getFunctionType( - *Modified, proto->getParamTypes(), proto->getExtProtoInfo())); - else - Function->setType( - S.Context.getFunctionNoProtoType(*Modified, FnType->getExtInfo())); - } - } else if (auto Method = dyn_cast(D)) { - if (auto Modified = GetModified(D, Method->getReturnType(), Nullability)) { - Method->setReturnType(*Modified); + return; - // Make it a context-sensitive keyword if we can. - if (!isIndirectPointerType(*Modified)) - Method->setObjCDeclQualifier(Decl::ObjCDeclQualifier( - Method->getObjCDeclQualifier() | Decl::OBJC_TQ_CSNullability)); - } - } else if (auto Value = dyn_cast(D)) { - if (auto Modified = GetModified(D, Value->getType(), Nullability)) { - Value->setType(*Modified); + // Version-independent APINotes add "type" annotations + // with a versioned attribute for the client to select and apply. + if (S.captureSwiftVersionIndependentAPINotes()) { + auto *typeAttr = SwiftTypeAttr::CreateImplicit(S.Context, typeString); + auto *versioned = SwiftVersionedAdditionAttr::CreateImplicit( + S.Context, metadata.Version, typeAttr, metadata.IsReplacement); + decl->addAttr(versioned); + } else { + if (!metadata.IsActive) + return; + S.ApplyAPINotesType(decl, typeString); + } +} - // Make it a context-sensitive keyword if we can. - if (auto Parm = dyn_cast(D)) { - if (Parm->isObjCMethodParameter() && !isIndirectPointerType(*Modified)) - Parm->setObjCDeclQualifier(Decl::ObjCDeclQualifier( - Parm->getObjCDeclQualifier() | Decl::OBJC_TQ_CSNullability)); - } +/// Apply nullability to the given declaration. +static void applyNullability(Sema &S, Decl *decl, NullabilityKind nullability, + VersionedInfoMetadata metadata) { + // Version-independent APINotes add "nullability" annotations + // with a versioned attribute for the client to select and apply. + if (S.captureSwiftVersionIndependentAPINotes()) { + SwiftNullabilityAttr::Kind attrNullabilityKind; + switch (nullability) { + case NullabilityKind::NonNull: + attrNullabilityKind = SwiftNullabilityAttr::Kind::NonNull; + break; + case NullabilityKind::Nullable: + attrNullabilityKind = SwiftNullabilityAttr::Kind::Nullable; + break; + case NullabilityKind::Unspecified: + attrNullabilityKind = SwiftNullabilityAttr::Kind::Unspecified; + break; + case NullabilityKind::NullableResult: + attrNullabilityKind = SwiftNullabilityAttr::Kind::NullableResult; + break; } - } else if (auto Property = dyn_cast(D)) { - if (auto Modified = GetModified(D, Property->getType(), Nullability)) { - Property->setType(*Modified, Property->getTypeSourceInfo()); + auto *nullabilityAttr = + SwiftNullabilityAttr::CreateImplicit(S.Context, attrNullabilityKind); + auto *versioned = SwiftVersionedAdditionAttr::CreateImplicit( + S.Context, metadata.Version, nullabilityAttr, metadata.IsReplacement); + decl->addAttr(versioned); + return; + } else { + if (!metadata.IsActive) + return; - // Make it a property attribute if we can. - if (!isIndirectPointerType(*Modified)) - Property->setPropertyAttributes( - ObjCPropertyAttribute::kind_null_resettable); - } + S.ApplyNullability(decl, nullability); } } @@ -361,42 +356,99 @@ static bool checkAPINotesReplacementType(Sema &S, SourceLocation Loc, return false; } -/// Process API notes for a variable or property. -static void ProcessAPINotes(Sema &S, Decl *D, - const api_notes::VariableInfo &Info, - VersionedInfoMetadata Metadata) { - // Type override. - if (Metadata.IsActive && !Info.getType().empty() && - S.ParseTypeFromStringCallback) { - auto ParsedType = S.ParseTypeFromStringCallback( - Info.getType(), "", D->getLocation()); +void Sema::ApplyAPINotesType(Decl *D, StringRef TypeString) { + if (!TypeString.empty() && ParseTypeFromStringCallback) { + auto ParsedType = ParseTypeFromStringCallback(TypeString, "", + D->getLocation()); if (ParsedType.isUsable()) { QualType Type = Sema::GetTypeFromParser(ParsedType.get()); - auto TypeInfo = - S.Context.getTrivialTypeSourceInfo(Type, D->getLocation()); - + auto TypeInfo = Context.getTrivialTypeSourceInfo(Type, D->getLocation()); if (auto Var = dyn_cast(D)) { // Make adjustments to parameter types. if (isa(Var)) { - Type = S.ObjC().AdjustParameterTypeForObjCAutoRefCount( + Type = ObjC().AdjustParameterTypeForObjCAutoRefCount( Type, D->getLocation(), TypeInfo); - Type = S.Context.getAdjustedParameterType(Type); + Type = Context.getAdjustedParameterType(Type); } - if (!checkAPINotesReplacementType(S, Var->getLocation(), Var->getType(), - Type)) { + if (!checkAPINotesReplacementType(*this, Var->getLocation(), + Var->getType(), Type)) { Var->setType(Type); Var->setTypeSourceInfo(TypeInfo); } - } else if (auto Property = dyn_cast(D)) { - if (!checkAPINotesReplacementType(S, Property->getLocation(), - Property->getType(), Type)) - Property->setType(Type, TypeInfo); - - } else + } else if (auto property = dyn_cast(D)) { + if (!checkAPINotesReplacementType(*this, property->getLocation(), + property->getType(), Type)) { + property->setType(Type, TypeInfo); + } + } else { llvm_unreachable("API notes allowed a type on an unknown declaration"); + } + } + } +} + +void Sema::ApplyNullability(Decl *D, NullabilityKind Nullability) { + auto GetModified = + [&](class Decl *D, QualType QT, + NullabilityKind Nullability) -> std::optional { + QualType Original = QT; + CheckImplicitNullabilityTypeSpecifier(QT, Nullability, D->getLocation(), + isa(D), + /*OverrideExisting=*/true); + return (QT.getTypePtr() != Original.getTypePtr()) ? std::optional(QT) + : std::nullopt; + }; + + if (auto Function = dyn_cast(D)) { + if (auto Modified = + GetModified(D, Function->getReturnType(), Nullability)) { + const FunctionType *FnType = Function->getType()->castAs(); + if (const FunctionProtoType *proto = dyn_cast(FnType)) + Function->setType(Context.getFunctionType( + *Modified, proto->getParamTypes(), proto->getExtProtoInfo())); + else + Function->setType( + Context.getFunctionNoProtoType(*Modified, FnType->getExtInfo())); + } + } else if (auto Method = dyn_cast(D)) { + if (auto Modified = GetModified(D, Method->getReturnType(), Nullability)) { + Method->setReturnType(*Modified); + + // Make it a context-sensitive keyword if we can. + if (!isIndirectPointerType(*Modified)) + Method->setObjCDeclQualifier(Decl::ObjCDeclQualifier( + Method->getObjCDeclQualifier() | Decl::OBJC_TQ_CSNullability)); + } + } else if (auto Value = dyn_cast(D)) { + if (auto Modified = GetModified(D, Value->getType(), Nullability)) { + Value->setType(*Modified); + + // Make it a context-sensitive keyword if we can. + if (auto Parm = dyn_cast(D)) { + if (Parm->isObjCMethodParameter() && !isIndirectPointerType(*Modified)) + Parm->setObjCDeclQualifier(Decl::ObjCDeclQualifier( + Parm->getObjCDeclQualifier() | Decl::OBJC_TQ_CSNullability)); + } + } + } else if (auto Property = dyn_cast(D)) { + if (auto Modified = GetModified(D, Property->getType(), Nullability)) { + Property->setType(*Modified, Property->getTypeSourceInfo()); + + // Make it a property attribute if we can. + if (!isIndirectPointerType(*Modified)) + Property->setPropertyAttributes( + ObjCPropertyAttribute::kind_null_resettable); } } +} + +/// Process API notes for a variable or property. +static void ProcessAPINotes(Sema &S, Decl *D, + const api_notes::VariableInfo &Info, + VersionedInfoMetadata Metadata) { + // Type override. + applyAPINotesType(S, D, Info.getType(), Metadata); // Nullability. if (auto Nullability = Info.getNullability()) @@ -814,7 +866,8 @@ static void ProcessVersionedAPINotes( Sema &S, SpecificDecl *D, const api_notes::APINotesReader::VersionedInfo Info) { - maybeAttachUnversionedSwiftName(S, D, Info); + if (!S.captureSwiftVersionIndependentAPINotes()) + maybeAttachUnversionedSwiftName(S, D, Info); unsigned Selected = Info.getSelected().value_or(Info.size()); @@ -824,10 +877,18 @@ static void ProcessVersionedAPINotes( std::tie(Version, InfoSlice) = Info[i]; auto Active = (i == Selected) ? IsActive_t::Active : IsActive_t::Inactive; auto Replacement = IsSubstitution_t::Original; - if (Active == IsActive_t::Inactive && Version.empty()) { + + // When collection all APINotes as version-independent, + // capture all as inactive and defer to the client select the + // right one. + if (S.captureSwiftVersionIndependentAPINotes()) { + Active = IsActive_t::Inactive; + Replacement = IsSubstitution_t::Original; + } else if (Active == IsActive_t::Inactive && Version.empty()) { Replacement = IsSubstitution_t::Replacement; Version = Info[Selected].first; } + ProcessAPINotes(S, D, InfoSlice, VersionedInfoMetadata(Version, Active, Replacement)); } diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index e27ed8fd4de14..01252a4bc69c6 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -263,7 +263,7 @@ static void DiagnoseCastQual(Sema &Self, const ExprResult &SrcExpr, // %2: Destination Type static TryCastResult TryLValueToRValueCast(Sema &Self, Expr *SrcExpr, QualType DestType, bool CStyle, - CastKind &Kind, + SourceRange OpRange, CastKind &Kind, CXXCastPath &BasePath, unsigned &msg); static TryCastResult @@ -1425,8 +1425,8 @@ static TryCastResult TryStaticCast(Sema &Self, ExprResult &SrcExpr, // C++11 [expr.static.cast]p3: // A glvalue of type "cv1 T1" can be cast to type "rvalue reference to cv2 // T2" if "cv2 T2" is reference-compatible with "cv1 T1". - tcr = TryLValueToRValueCast(Self, SrcExpr.get(), DestType, CStyle, Kind, - BasePath, msg); + tcr = TryLValueToRValueCast(Self, SrcExpr.get(), DestType, CStyle, OpRange, + Kind, BasePath, msg); if (tcr != TC_NotApplicable) return tcr; @@ -1602,8 +1602,8 @@ static TryCastResult TryStaticCast(Sema &Self, ExprResult &SrcExpr, /// Tests whether a conversion according to N2844 is valid. TryCastResult TryLValueToRValueCast(Sema &Self, Expr *SrcExpr, QualType DestType, bool CStyle, - CastKind &Kind, CXXCastPath &BasePath, - unsigned &msg) { + SourceRange OpRange, CastKind &Kind, + CXXCastPath &BasePath, unsigned &msg) { // C++11 [expr.static.cast]p3: // A glvalue of type "cv1 T1" can be cast to type "rvalue reference to // cv2 T2" if "cv2 T2" is reference-compatible with "cv1 T1". @@ -1616,7 +1616,6 @@ TryCastResult TryLValueToRValueCast(Sema &Self, Expr *SrcExpr, // Because we try the reference downcast before this function, from now on // this is the only cast possibility, so we issue an error if we fail now. - // FIXME: Should allow casting away constness if CStyle. QualType FromType = SrcExpr->getType(); QualType ToType = R->getPointeeType(); if (CStyle) { @@ -1640,13 +1639,12 @@ TryCastResult TryLValueToRValueCast(Sema &Self, Expr *SrcExpr, if (RefConv & Sema::ReferenceConversions::DerivedToBase) { Kind = CK_DerivedToBase; - CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true, - /*DetectVirtual=*/true); - if (!Self.IsDerivedFrom(SrcExpr->getBeginLoc(), SrcExpr->getType(), - R->getPointeeType(), Paths)) - return TC_NotApplicable; - - Self.BuildBasePathArray(Paths, BasePath); + if (Self.CheckDerivedToBaseConversion(FromType, ToType, + SrcExpr->getBeginLoc(), OpRange, + &BasePath, CStyle)) { + msg = 0; + return TC_Failed; + } } else Kind = CK_NoOp; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 11cbda412667f..d7234e269f645 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -17155,6 +17155,30 @@ bool Sema::CheckEnumUnderlyingType(TypeSourceInfo *TI) { if (T->isDependentType()) return false; + // C++0x 7.2p2: The type-specifier-seq of an enum-base shall name an + // integral type; any cv-qualification is ignored. + // C23 6.7.3.3p5: The underlying type of the enumeration is the unqualified, + // non-atomic version of the type specified by the type specifiers in the + // specifier qualifier list. + // Because of how odd C's rule is, we'll let the user know that operations + // involving the enumeration type will be non-atomic. + if (T->isAtomicType()) + Diag(UnderlyingLoc, diag::warn_atomic_stripped_in_enum); + + Qualifiers Q = T.getQualifiers(); + std::optional QualSelect; + if (Q.hasConst() && Q.hasVolatile()) + QualSelect = diag::CVQualList::Both; + else if (Q.hasConst()) + QualSelect = diag::CVQualList::Const; + else if (Q.hasVolatile()) + QualSelect = diag::CVQualList::Volatile; + + if (QualSelect) + Diag(UnderlyingLoc, diag::warn_cv_stripped_in_enum) << *QualSelect; + + T = T.getAtomicUnqualifiedType(); + // This doesn't use 'isIntegralType' despite the error message mentioning // integral type because isIntegralType would also allow enum types in C. if (const BuiltinType *BT = T->getAs()) @@ -17551,6 +17575,9 @@ Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc, } else if (UnderlyingType.get()) { // C++0x 7.2p2: The type-specifier-seq of an enum-base shall name an // integral type; any cv-qualification is ignored. + // C23 6.7.3.3p5: The underlying type of the enumeration is the + // unqualified, non-atomic version of the type specified by the type + // specifiers in the specifier qualifier list. TypeSourceInfo *TI = nullptr; GetTypeFromParser(UnderlyingType.get(), &TI); EnumUnderlying = TI; @@ -17563,6 +17590,18 @@ Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc, UPPC_FixedUnderlyingType)) EnumUnderlying = Context.IntTy.getTypePtr(); + // If the underlying type is atomic, we need to adjust the type before + // continuing. This only happens in the case we stored a TypeSourceInfo + // into EnumUnderlying because the other cases are error recovery up to + // this point. But because it's not possible to gin up a TypeSourceInfo + // for a non-atomic type from an atomic one, we'll store into the Type + // field instead. FIXME: it would be nice to have an easy way to get a + // derived TypeSourceInfo which strips qualifiers including the weird + // ones like _Atomic where it forms a different type. + if (TypeSourceInfo *TI = dyn_cast(EnumUnderlying); + TI && TI->getType()->isAtomicType()) + EnumUnderlying = TI->getType().getAtomicUnqualifiedType().getTypePtr(); + } else if (Context.getTargetInfo().getTriple().isWindowsMSVCEnvironment()) { // For MSVC ABI compatibility, unfixed enums must use an underlying type // of 'int'. However, if this is an unfixed forward declaration, don't set diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 7ebb53318702c..099207727c8c8 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2912,18 +2912,14 @@ static void handleWarnUnusedResult(Sema &S, Decl *D, const ParsedAttr &AL) { // If this is spelled as the standard C++17 attribute, but not in C++17, // warn about using it as an extension. If there are attribute arguments, - // then claim it's a C++20 extension instead. - // FIXME: If WG14 does not seem likely to adopt the same feature, add an - // extension warning for C23 mode. + // then claim it's a C++20 extension instead. C23 supports this attribute + // with the message; no extension warning is needed there beyond the one + // already issued for accepting attributes in older modes. const LangOptions &LO = S.getLangOpts(); if (AL.getNumArgs() == 1) { if (LO.CPlusPlus && !LO.CPlusPlus20) S.Diag(AL.getLoc(), diag::ext_cxx20_attr) << AL; - // Since this is spelled [[nodiscard]], get the optional string - // literal. If in C++ mode, but not in C++20 mode, diagnose as an - // extension. - // FIXME: C23 should support this feature as well, even as an extension. if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, nullptr)) return; } else if (LO.CPlusPlus && !LO.CPlusPlus17) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index a30acbe9a4bca..4ecc9b0d4c5c8 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -6620,6 +6620,8 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( case OMPC_affinity: case OMPC_bind: case OMPC_filter: + case OMPC_severity: + case OMPC_message: continue; case OMPC_allocator: case OMPC_flush: @@ -6637,8 +6639,6 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( case OMPC_match: case OMPC_when: case OMPC_at: - case OMPC_severity: - case OMPC_message: default: llvm_unreachable("Unexpected clause"); } diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 7af3acacb5ba6..1b54628c5e564 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -13131,7 +13131,8 @@ CompleteNonViableCandidate(Sema &S, OverloadCandidate *Cand, ParamTypes = Cand->Function->getType()->castAs()->getParamTypes(); if (isa(Cand->Function) && - !isa(Cand->Function) && !Reversed) { + !isa(Cand->Function) && !Reversed && + !Cand->Function->hasCXXExplicitFunctionObjectParameter()) { // Conversion 0 is 'this', which doesn't have a corresponding parameter. ConvIdx = 1; if (CSK == OverloadCandidateSet::CSK_Operator && @@ -13149,9 +13150,8 @@ CompleteNonViableCandidate(Sema &S, OverloadCandidate *Cand, // Fill in the rest of the conversions. for (unsigned ParamIdx = Reversed ? ParamTypes.size() - 1 : 0; - ConvIdx != ConvCount; + ConvIdx != ConvCount && ArgIdx < Args.size(); ++ConvIdx, ++ArgIdx, ParamIdx += (Reversed ? -1 : 1)) { - assert(ArgIdx < Args.size() && "no argument for this arg conversion"); if (Cand->Conversions[ConvIdx].isInitialized()) { // We've already checked this conversion. } else if (ParamIdx < ParamTypes.size()) { diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 1a98b3583185e..b76619fc50268 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -8968,8 +8968,10 @@ Sema::ActOnFinishConceptDefinition(Scope *S, ConceptDecl *C, Expr *ConstraintExpr, const ParsedAttributesView &Attrs) { assert(!C->hasDefinition() && "Concept already defined"); - if (DiagnoseUnexpandedParameterPack(ConstraintExpr)) + if (DiagnoseUnexpandedParameterPack(ConstraintExpr)) { + C->setInvalidDecl(); return nullptr; + } C->setDefinition(ConstraintExpr); ProcessDeclAttributeList(S, C, Attrs); diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 70a4c159f9805..e2c3cdcd536bc 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -2022,8 +2022,17 @@ Decl *TemplateDeclInstantiator::VisitEnumDecl(EnumDecl *D) { DeclarationName()); if (!NewTI || SemaRef.CheckEnumUnderlyingType(NewTI)) Enum->setIntegerType(SemaRef.Context.IntTy); - else - Enum->setIntegerTypeSourceInfo(NewTI); + else { + // If the underlying type is atomic, we need to adjust the type before + // continuing. See C23 6.7.3.3p5 and Sema::ActOnTag(). FIXME: same as + // within ActOnTag(), it would be nice to have an easy way to get a + // derived TypeSourceInfo which strips qualifiers including the weird + // ones like _Atomic where it forms a different type. + if (NewTI->getType()->isAtomicType()) + Enum->setIntegerType(NewTI->getType().getAtomicUnqualifiedType()); + else + Enum->setIntegerTypeSourceInfo(NewTI); + } // C++23 [conv.prom]p4 // if integral promotion can be applied to its underlying type, a prvalue diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index 2437b2d3595e5..b641e4a0f0abb 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -117,7 +117,6 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries( std::lock_guard LockGuard(Shard.CacheLock); for (const auto &[Path, CachedPair] : Shard.CacheByFilename) { const CachedFileSystemEntry *Entry = CachedPair.first; - llvm::ErrorOr Status = UnderlyingFS.status(Path); if (Status) { if (Entry->getError()) { @@ -128,12 +127,22 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries( InvalidDiagInfo.emplace_back(Path.data()); } else { llvm::vfs::Status CachedStatus = Entry->getStatus(); - uint64_t CachedSize = CachedStatus.getSize(); - uint64_t ActualSize = Status->getSize(); - if (CachedSize != ActualSize) { - // This is the case where the cached file has a different size - // from the actual file that comes from the underlying FS. - InvalidDiagInfo.emplace_back(Path.data(), CachedSize, ActualSize); + if (Status->getType() == llvm::sys::fs::file_type::regular_file && + Status->getType() == CachedStatus.getType()) { + // We only check regular files. Directory files sizes could change + // due to content changes, and reporting directory size changes can + // lead to false positives. + // TODO: At the moment, we do not detect symlinks to files whose + // size may change. We need to decide if we want to detect cached + // symlink size changes. We can also expand this to detect file + // type changes. + uint64_t CachedSize = CachedStatus.getSize(); + uint64_t ActualSize = Status->getSize(); + if (CachedSize != ActualSize) { + // This is the case where the cached file has a different size + // from the actual file that comes from the underlying FS. + InvalidDiagInfo.emplace_back(Path.data(), CachedSize, ActualSize); + } } } } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp index 515211d47b348..27734ffd0e20b 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp @@ -40,6 +40,7 @@ class MakeDependencyPrinterConsumer : public DependencyConsumer { void handlePrebuiltModuleDependency(PrebuiltModuleDep PMD) override {} void handleModuleDependency(ModuleDeps MD) override {} void handleDirectModuleDependency(ModuleID ID) override {} + void handleVisibleModule(std::string ModuleName) override {} void handleContextHash(std::string Hash) override {} void printDependencies(std::string &S) { @@ -154,7 +155,8 @@ DependencyScanningTool::getTranslationUnitDependencies( return Consumer.takeTranslationUnitDeps(); } -llvm::Expected DependencyScanningTool::getModuleDependencies( +llvm::Expected +DependencyScanningTool::getModuleDependencies( StringRef ModuleName, const std::vector &CommandLine, StringRef CWD, const llvm::DenseSet &AlreadySeen, LookupModuleOutputCallback LookupModuleOutput) { @@ -164,7 +166,7 @@ llvm::Expected DependencyScanningTool::getModuleDependencies( Controller, ModuleName); if (Result) return std::move(Result); - return Consumer.takeModuleGraphDeps(); + return Consumer.takeTranslationUnitDeps(); } TranslationUnitDeps FullDependencyConsumer::takeTranslationUnitDeps() { @@ -175,6 +177,7 @@ TranslationUnitDeps FullDependencyConsumer::takeTranslationUnitDeps() { TU.NamedModuleDeps = std::move(NamedModuleDeps); TU.FileDeps = std::move(Dependencies); TU.PrebuiltModuleDeps = std::move(PrebuiltModuleDeps); + TU.VisibleModules = std::move(VisibleModules); TU.Commands = std::move(Commands); for (auto &&M : ClangModuleDeps) { @@ -190,19 +193,4 @@ TranslationUnitDeps FullDependencyConsumer::takeTranslationUnitDeps() { return TU; } -ModuleDepsGraph FullDependencyConsumer::takeModuleGraphDeps() { - ModuleDepsGraph ModuleGraph; - - for (auto &&M : ClangModuleDeps) { - auto &MD = M.second; - // TODO: Avoid handleModuleDependency even being called for modules - // we've already seen. - if (AlreadySeen.count(M.first)) - continue; - ModuleGraph.push_back(std::move(MD)); - } - - return ModuleGraph; -} - CallbackActionController::~CallbackActionController() {} diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index fa86d714ff69a..37f8b945d785e 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -673,8 +673,10 @@ void ModuleDepCollectorPP::handleImport(const Module *Imported) { if (MDC.isPrebuiltModule(TopLevelModule)) MDC.DirectPrebuiltModularDeps.insert( {TopLevelModule, PrebuiltModuleDep{TopLevelModule}}); - else + else { MDC.DirectModularDeps.insert(TopLevelModule); + MDC.DirectImports.insert(Imported); + } } void ModuleDepCollectorPP::EndOfMainFile() { @@ -706,6 +708,8 @@ void ModuleDepCollectorPP::EndOfMainFile() { if (!MDC.isPrebuiltModule(M)) MDC.DirectModularDeps.insert(M); + MDC.addVisibleModules(); + for (const Module *M : MDC.DirectModularDeps) handleTopLevelModule(M); @@ -727,6 +731,9 @@ void ModuleDepCollectorPP::EndOfMainFile() { MDC.Consumer.handleDirectModuleDependency(It->second->ID); } + for (auto &&I : MDC.VisibleModules) + MDC.Consumer.handleVisibleModule(std::string(I.getKey())); + for (auto &&I : MDC.FileDeps) MDC.Consumer.handleFileDependency(I); @@ -993,6 +1000,29 @@ bool ModuleDepCollector::isPrebuiltModule(const Module *M) { return true; } +void ModuleDepCollector::addVisibleModules() { + llvm::DenseSet ImportedModules; + auto InsertVisibleModules = [&](const Module *M) { + if (ImportedModules.contains(M)) + return; + + VisibleModules.insert(M->getTopLevelModuleName()); + SmallVector Stack; + M->getExportedModules(Stack); + while (!Stack.empty()) { + const Module *CurrModule = Stack.pop_back_val(); + if (ImportedModules.contains(CurrModule)) + continue; + ImportedModules.insert(CurrModule); + VisibleModules.insert(CurrModule->getTopLevelModuleName()); + CurrModule->getExportedModules(Stack); + } + }; + + for (const Module *Import : DirectImports) + InsertVisibleModules(Import); +} + static StringRef makeAbsoluteAndPreferred(CompilerInstance &CI, StringRef Path, SmallVectorImpl &Storage) { if (llvm::sys::path::is_absolute(Path) && diff --git a/clang/test/APINotes/versioned-version-independent.m b/clang/test/APINotes/versioned-version-independent.m new file mode 100644 index 0000000000000..da8b34a1d9ba3 --- /dev/null +++ b/clang/test/APINotes/versioned-version-independent.m @@ -0,0 +1,36 @@ +// RUN: rm -rf %t && mkdir -p %t + +// Build and check the module file in version-independent mode. +// RUN: %clang_cc1 -fswift-version-independent-apinotes -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s +// RUN: %clang_cc1 -fswift-version-independent-apinotes -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache/Versioned -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers -F %S/Inputs/Frameworks %s -ast-dump -ast-dump-filter 'DUMP' &> %t/VersionedKit_AST_Dump.txt +// RUN: cat %t/VersionedKit_AST_Dump.txt | FileCheck -check-prefix=CHECK-VERSIONED-DUMP %s + +#import + +// CHECK-VERSIONED-DUMP-LABEL: Dumping moveToPointDUMP +// CHECK-VERSIONED-DUMP: SwiftNameAttr {{.+}} "moveTo(x:y:)" +// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} <> "moveTo(a:b:)" + +// CHECK-VERSIONED-DUMP-LABEL: Dumping unversionedRenameDUMP +// CHECK-VERSIONED-DUMP: SwiftNameAttr {{.+}} "unversionedRename_HEADER()" +// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 0 +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "unversionedRename_NOTES()" + +// CHECK-VERSIONED-DUMP-LABEL: Dumping TestGenericDUMP +// CHECK-VERSIONED-DUMP: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 +// CHECK-VERSIONED-DUMP-NEXT: SwiftImportAsNonGenericAttr {{.+}} <> + +// CHECK-VERSIONED-DUMP: Swift3RenamedOnlyDUMP +// CHECK-VERSIONED-DUMP: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift3Name" + +// CHECK-VERSIONED-DUMP: Swift3RenamedAlsoDUMP +// CHECK-VERSIONED-DUMP: SwiftNameAttr {{.+}} "Swift4Name" +// CHECK-VERSIONED-DUMP-NEXT: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift3Also" + +// CHECK-VERSIONED-DUMP: Swift4RenamedDUMP +// CHECK-VERSIONED-DUMP: SwiftVersionedAdditionAttr {{.+}} Implicit 4 +// CHECK-VERSIONED-DUMP-NEXT: SwiftNameAttr {{.+}} "SpecialSwift4Name" + diff --git a/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl b/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl index b74e183eec9cc..1c8b9c10f5a98 100644 --- a/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl +++ b/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl @@ -12,7 +12,7 @@ // // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump \ // RUN: -DRESOURCE=RWStructuredBuffer %s | FileCheck -DRESOURCE=RWStructuredBuffer \ -// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-SUBSCRIPT,CHECK-COUNTER,CHECK-LOAD %s +// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-SUBSCRIPT,CHECK-SUBSCRIPT-UAV,CHECK-COUNTER,CHECK-LOAD %s // // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY \ // RUN: -DRESOURCE=AppendStructuredBuffer %s | FileCheck -DRESOURCE=AppendStructuredBuffer \ @@ -36,7 +36,7 @@ // // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump \ // RUN: -DRESOURCE=RasterizerOrderedStructuredBuffer %s | FileCheck -DRESOURCE=RasterizerOrderedStructuredBuffer \ -// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-ROV,CHECK-SUBSCRIPT,CHECK-LOAD %s +// RUN: -check-prefixes=CHECK,CHECK-UAV,CHECK-ROV,CHECK-SUBSCRIPT,CHECK-SUBSCRIPT-UAV,CHECK-LOAD %s // This test tests two different AST generations for each structured buffer. // The "EMPTY" test mode verifies the AST generated by forward declaration @@ -170,22 +170,22 @@ RESOURCE Buffer; // CHECK-SUBSCRIPT-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'Index' 'unsigned int' // CHECK-SUBSCRIPT-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline -// CHECK-SUBSCRIPT-NEXT: CXXMethodDecl {{.*}} operator[] 'hlsl_device element_type &(unsigned int)' -// CHECK-SUBSCRIPT-NEXT: ParmVarDecl {{.*}} Index 'unsigned int' -// CHECK-SUBSCRIPT-NEXT: CompoundStmt -// CHECK-SUBSCRIPT-NEXT: ReturnStmt -// CHECK-SUBSCRIPT-NEXT: UnaryOperator {{.*}} 'hlsl_device element_type' prefix '*' cannot overflow -// CHECK-SUBSCRIPT-NEXT: CallExpr {{.*}} 'hlsl_device element_type *' -// CHECK-SUBSCRIPT-NEXT: ImplicitCastExpr {{.*}} -// CHECK-SUBSCRIPT-NEXT: DeclRefExpr {{.*}} '' Function {{.*}} '__builtin_hlsl_resource_getpointer' 'void (...) noexcept' -// CHECK-SUBSCRIPT-NEXT: MemberExpr {{.*}} '__hlsl_resource_t -// CHECK-SUBSCRIPT-SAME{LITERAL}: [[hlsl::resource_class( -// CHECK-SUBSCRIPT-SAME{LITERAL}: [[hlsl::raw_buffer]] -// CHECK-SUBSCRIPT-SAME{LITERAL}: [[hlsl::contained_type(element_type)]] -// CHECK-SUBSCRIPT-SAME: ' lvalue .__handle {{.*}} -// CHECK-SUBSCRIPT-NEXT: CXXThisExpr {{.*}} '[[RESOURCE]]' lvalue implicit this -// CHECK-SUBSCRIPT-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'Index' 'unsigned int' -// CHECK-SUBSCRIPT-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline +// CHECK-SUBSCRIPT-UAV-NEXT: CXXMethodDecl {{.*}} operator[] 'hlsl_device element_type &(unsigned int)' +// CHECK-SUBSCRIPT-UAV-NEXT: ParmVarDecl {{.*}} Index 'unsigned int' +// CHECK-SUBSCRIPT-UAV-NEXT: CompoundStmt +// CHECK-SUBSCRIPT-UAV-NEXT: ReturnStmt +// CHECK-SUBSCRIPT-UAV-NEXT: UnaryOperator {{.*}} 'hlsl_device element_type' prefix '*' cannot overflow +// CHECK-SUBSCRIPT-UAV-NEXT: CallExpr {{.*}} 'hlsl_device element_type *' +// CHECK-SUBSCRIPT-UAV-NEXT: ImplicitCastExpr {{.*}} +// CHECK-SUBSCRIPT-UAV-NEXT: DeclRefExpr {{.*}} '' Function {{.*}} '__builtin_hlsl_resource_getpointer' 'void (...) noexcept' +// CHECK-SUBSCRIPT-UAV-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-SUBSCRIPT-UAV-SAME{LITERAL}: [[hlsl::resource_class( +// CHECK-SUBSCRIPT-UAV-SAME{LITERAL}: [[hlsl::raw_buffer]] +// CHECK-SUBSCRIPT-UAV-SAME{LITERAL}: [[hlsl::contained_type(element_type)]] +// CHECK-SUBSCRIPT-UAV-SAME: ' lvalue .__handle {{.*}} +// CHECK-SUBSCRIPT-UAV-NEXT: CXXThisExpr {{.*}} '[[RESOURCE]]' lvalue implicit this +// CHECK-SUBSCRIPT-UAV-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'Index' 'unsigned int' +// CHECK-SUBSCRIPT-UAV-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline // CHECK-NOSUBSCRIPT-NOT: CXXMethodDecl {{.*}} operator[] 'const hlsl_device element_type &(unsigned int) const' // CHECK-NOSUBSCRIPT-NOT: CXXMethodDecl {{.*}} operator[] 'hlsl_device element_type &(unsigned int)' diff --git a/clang/test/AST/HLSL/TypedBuffers-AST.hlsl b/clang/test/AST/HLSL/TypedBuffers-AST.hlsl index d098e5a323ca7..d6b88e276762e 100644 --- a/clang/test/AST/HLSL/TypedBuffers-AST.hlsl +++ b/clang/test/AST/HLSL/TypedBuffers-AST.hlsl @@ -126,7 +126,7 @@ RESOURCE Buffer; // CHECK-NEXT: DeclRefExpr {{.*}} 'const char *' ParmVar {{.*}} 'name' 'const char *' // CHECK-NEXT: AlwaysInlineAttr -// Subsctript operators +// Subscript operators // CHECK: CXXMethodDecl {{.*}} operator[] 'const hlsl_device element_type &(unsigned int) const' // CHECK-NEXT: ParmVarDecl {{.*}} Index 'unsigned int' @@ -145,22 +145,21 @@ RESOURCE Buffer; // CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'Index' 'unsigned int' // CHECK-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline -// CHECK-NEXT: CXXMethodDecl {{.*}} operator[] 'hlsl_device element_type &(unsigned int)' -// CHECK-NEXT: ParmVarDecl {{.*}} Index 'unsigned int' -// CHECK-NEXT: CompoundStmt -// CHECK-NEXT: ReturnStmt -// CHECK-NEXT: UnaryOperator {{.*}} 'hlsl_device element_type' prefix '*' cannot overflow -// CHECK-NEXT: CallExpr {{.*}} 'hlsl_device element_type *' -// CHECK-NEXT: ImplicitCastExpr {{.*}} -// CHECK-NEXT: DeclRefExpr {{.*}} '' Function {{.*}} '__builtin_hlsl_resource_getpointer' 'void (...) noexcept' -// CHECK-NEXT: MemberExpr {{.*}} '__hlsl_resource_t +// CHECK-UAV-NEXT: CXXMethodDecl {{.*}} operator[] 'hlsl_device element_type &(unsigned int)' +// CHECK-UAV-NEXT: ParmVarDecl {{.*}} Index 'unsigned int' +// CHECK-UAV-NEXT: CompoundStmt +// CHECK-UAV-NEXT: ReturnStmt +// CHECK-UAV-NEXT: UnaryOperator {{.*}} 'hlsl_device element_type' prefix '*' cannot overflow +// CHECK-UAV-NEXT: CallExpr {{.*}} 'hlsl_device element_type *' +// CHECK-UAV-NEXT: ImplicitCastExpr {{.*}} +// CHECK-UAV-NEXT: DeclRefExpr {{.*}} '' Function {{.*}} '__builtin_hlsl_resource_getpointer' 'void (...) noexcept' +// CHECK-UAV-NEXT: MemberExpr {{.*}} '__hlsl_resource_t // CHECK-UAV-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] -// CHECK-SRV-SAME{LITERAL}: [[hlsl::resource_class(SRV)]] -// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]] -// CHECK-SAME: ' lvalue .__handle {{.*}} -// CHECK-NEXT: CXXThisExpr {{.*}} '[[RESOURCE]]' lvalue implicit this -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'Index' 'unsigned int' -// CHECK-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline +// CHECK-UAV-SAME{LITERAL}: [[hlsl::contained_type(element_type)]] +// CHECK-UAV-SAME: ' lvalue .__handle {{.*}} +// CHECK-UAV-NEXT: CXXThisExpr {{.*}} '[[RESOURCE]]' lvalue implicit this +// CHECK-UAV-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'Index' 'unsigned int' +// CHECK-UAV-NEXT: AlwaysInlineAttr {{.*}} Implicit always_inline // Load method diff --git a/clang/test/C/C23/n3030.c b/clang/test/C/C23/n3030.c index 17084bbb55f50..94ea7037edd11 100644 --- a/clang/test/C/C23/n3030.c +++ b/clang/test/C/C23/n3030.c @@ -91,3 +91,19 @@ enum e : short f = 0; // expected-error {{non-defining declaration of enumeratio enum g : short { yyy } h = yyy; enum ee2 : typeof ((enum ee3 : short { A })0, (short)0); + +enum not_actually_atomic : _Atomic(short) { // expected-error {{'_Atomic' qualifier ignored; operations involving the enumeration type will be non-atomic}} + Surprise +}; + +enum not_actually_const : const int { // expected-warning {{'const' qualifier in enumeration underlying type ignored}} + SurpriseAgain +}; + +enum not_actually_volatile : volatile int { // expected-warning {{'volatile' qualifier in enumeration underlying type ignored}} + SurpriseOnceMore +}; + +enum not_acually_const_or_volatile : const volatile int { // expected-warning {{'const' and 'volatile' qualifiers in enumeration underlying type ignored}} + WhyTheSurprise +}; diff --git a/clang/test/C/C23/n3030_1.c b/clang/test/C/C23/n3030_1.c new file mode 100644 index 0000000000000..1afc9855767f0 --- /dev/null +++ b/clang/test/C/C23/n3030_1.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -std=c23 -Wno-underlying-atomic-qualifier-ignored -ast-dump %s | FileCheck %s + +// The underlying type is the unqualified, non-atomic version of the type +// specified. +enum const_enum : const short { ConstE }; +// CHECK: EnumDecl {{.*}} const_enum 'short' + +// These were previously being diagnosed as invalid underlying types. They +// are valid; the _Atomic is stripped from the underlying type. +enum atomic_enum1 : _Atomic(int) { AtomicE1 }; +// CHECK: EnumDecl {{.*}} atomic_enum1 'int' +enum atomic_enum2 : _Atomic long long { AtomicE2 }; +// CHECK: EnumDecl {{.*}} atomic_enum2 'long long' diff --git a/clang/test/CIR/CodeGen/bitfields.c b/clang/test/CIR/CodeGen/bitfields.c index ee69db22b4a20..fc688fb4cdcaa 100644 --- a/clang/test/CIR/CodeGen/bitfields.c +++ b/clang/test/CIR/CodeGen/bitfields.c @@ -134,3 +134,136 @@ unsigned int load_field_unsigned(A* s) { //OGCG: [[TMP4:%.*]] = lshr i16 [[TMP3]], 3 //OGCG: [[TMP5:%.*]] = and i16 [[TMP4]], 15 //OGCG: [[TMP6:%.*]] = zext i16 [[TMP5]] to i32 + +void store_field() { + S s; + s.e = 3; +} +// CIR: cir.func {{.*@store_field}} +// CIR: [[TMP0:%.*]] = cir.alloca !rec_S, !cir.ptr +// CIR: [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i +// CIR: [[TMP2:%.*]] = cir.get_member [[TMP0]][1] {name = "e"} : !cir.ptr -> !cir.ptr +// CIR: cir.set_bitfield(#bfi_e, [[TMP2]] : !cir.ptr, [[TMP1]] : !s32i) + +// LLVM: define dso_local void @store_field() +// LLVM: [[TMP0:%.*]] = alloca %struct.S, i64 1, align 4 +// LLVM: [[TMP1:%.*]] = getelementptr %struct.S, ptr [[TMP0]], i32 0, i32 1 +// LLVM: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// LLVM: [[TMP3:%.*]] = and i16 [[TMP2]], -32768 +// LLVM: [[TMP4:%.*]] = or i16 [[TMP3]], 3 +// LLVM: store i16 [[TMP4]], ptr [[TMP1]], align 2 + +// OGCG: define dso_local void @store_field() +// OGCG: [[TMP0:%.*]] = alloca %struct.S, align 4 +// OGCG: [[TMP1:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[TMP0]], i32 0, i32 1 +// OGCG: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// OGCG: [[TMP3:%.*]] = and i16 [[TMP2]], -32768 +// OGCG: [[TMP4:%.*]] = or i16 [[TMP3]], 3 +// OGCG: store i16 [[TMP4]], ptr [[TMP1]], align 4 + +void store_bitfield_to_bitfield() { + S s; + s.a = s.c; +} + +// CIR: cir.func {{.*@store_bitfield_to_bitfield}} +// CIR: [[TMP0:%.*]] = cir.alloca !rec_S, !cir.ptr, ["s"] {alignment = 4 : i64} +// CIR: [[TMP1:%.*]] = cir.get_member [[TMP0]][0] {name = "c"} : !cir.ptr -> !cir.ptr +// CIR: [[TMP2:%.*]] = cir.get_bitfield(#bfi_c, [[TMP1]] : !cir.ptr) -> !s32i +// CIR: [[TMP3:%.*]] = cir.get_member [[TMP0]][0] {name = "a"} : !cir.ptr -> !cir.ptr +// CIR: [[TMP4:%.*]] = cir.set_bitfield(#bfi_a, [[TMP3]] : !cir.ptr, [[TMP2]] : !s32i) -> !s32i + +// LLVM: define dso_local void @store_bitfield_to_bitfield() +// LLVM: [[TMP0:%.*]] = alloca %struct.S, i64 1, align 4 +// LLVM: [[TMP1:%.*]] = getelementptr %struct.S, ptr [[TMP0]], i32 0, i32 0 +// LLVM: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// LLVM: [[TMP3:%.*]] = shl i64 [[TMP2]], 15 +// LLVM: [[TMP4:%.*]] = ashr i64 [[TMP3]], 47 +// LLVM: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +// LLVM: [[TMP6:%.*]] = getelementptr %struct.S, ptr [[TMP0]], i32 0, i32 0 +// LLVM: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 +// LLVM: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 +// LLVM: [[TMP9:%.*]] = and i64 [[TMP7]], 15 +// LLVM: [[TMP10:%.*]] = and i64 [[TMP8]], -16 +// LLVM: [[TMP11:%.*]] = or i64 [[TMP10]], [[TMP9]] +// LLVM: store i64 [[TMP11]], ptr [[TMP6]], align 8 +// LLVM: [[TMP12:%.*]] = shl i64 [[TMP9]], 60 +// LLVM: [[TMP13:%.*]] = ashr i64 [[TMP12]], 60 +// LLVM: [[TMP15:%.*]] = trunc i64 [[TMP13]] to i32 + +// OGCG: define dso_local void @store_bitfield_to_bitfield() +// OGCG: [[TMP0:%.*]] = alloca %struct.S, align 4 +// OGCG: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 4 +// OGCG: [[TMP2:%.*]] = shl i64 [[TMP1]], 15 +// OGCG: [[TMP3:%.*]] = ashr i64 [[TMP2]], 47 +// OGCG: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 +// OGCG: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 +// OGCG: [[TMP6:%.*]] = load i64, ptr [[TMP0]], align 4 +// OGCG: [[TMP7:%.*]] = and i64 [[TMP5]], 15 +// OGCG: [[TMP8:%.*]] = and i64 [[TMP6]], -16 +// OGCG: [[TMP9:%.*]] = or i64 [[TMP8]], [[TMP7]] +// OGCG: store i64 [[TMP9]], ptr [[TMP0]], align 4 +// OGCG: [[TMP10:%.*]] = shl i64 %bf.value, 60 +// OGCG: [[TMP11:%.*]] = ashr i64 [[TMP10]], 60 +// OGCG: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 + +typedef struct { + int a : 30; + int volatile b : 8; + int c; +} V; + +void get_volatile(V* v) { + v->b = 3; +} + +// CIR: cir.func dso_local @get_volatile +// CIR: [[TMP0:%.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["v", init] {alignment = 8 : i64} +// CIR: [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i +// CIR: [[TMP2:%.*]] = cir.load align(8) [[TMP0]] : !cir.ptr>, !cir.ptr +// CIR: [[TMP3:%.*]] = cir.get_member [[TMP2]][0] {name = "b"} : !cir.ptr -> !cir.ptr +// CIR: [[TMP4:%.*]] = cir.set_bitfield(#bfi_b, [[TMP3]] : !cir.ptr, [[TMP1]] : !s32i) {is_volatile} -> !s32i + +// LLVM: define dso_local void @get_volatile +// LLVM: [[TMP0:%.*]] = alloca ptr, i64 1, align 8 +// LLVM: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// LLVM: [[TMP2:%.*]] = getelementptr %struct.V, ptr [[TMP1]], i32 0, i32 0 +// LLVM: [[TMP3:%.*]] = load volatile i64, ptr [[TMP2]], align 8 +// LLVM: [[TMP4:%.*]] = and i64 [[TMP3]], -1095216660481 +// LLVM: [[TMP5:%.*]] = or i64 [[TMP4]], 12884901888 +// LLVM: store volatile i64 [[TMP5]], ptr [[TMP2]], align 8 + +// OCGC: define dso_local void @get_volatile +// OCGC: [[TMP0:%.*]] = alloca ptr, align 8 +// OCGC: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// OCGC: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 4 +// OCGC: [[TMP3:%.*]] = and i64 [[TMP2]], -1095216660481 +// OCGC: [[TMP4:%.*]] = or i64 [[TMP3]], 12884901888 +// OCGC: store volatile i64 [[TMP4]], ptr [[TMP1]], align 4 + +void set_volatile(V* v) { + v->b = 3; +} +//CIR: cir.func dso_local @set_volatile +//CIR: [[TMP0:%.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["v", init] {alignment = 8 : i64} +//CIR: [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i +//CIR: [[TMP2:%.*]] = cir.load align(8) [[TMP0]] : !cir.ptr>, !cir.ptr +//CIR: [[TMP3:%.*]] = cir.get_member [[TMP2]][0] {name = "b"} : !cir.ptr -> !cir.ptr +//CIR: [[TMP4:%.*]] = cir.set_bitfield(#bfi_b, [[TMP3]] : !cir.ptr, [[TMP1]] : !s32i) {is_volatile} -> !s32i + +// LLVM: define dso_local void @set_volatile +// LLVM: [[TMP0:%.*]] = alloca ptr, i64 1, align 8 +// LLVM: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// LLVM: [[TMP2:%.*]] = getelementptr %struct.V, ptr [[TMP1]], i32 0, i32 0 +// LLVM: [[TMP3:%.*]] = load volatile i64, ptr [[TMP2]], align 8 +// LLVM: [[TMP4:%.*]] = and i64 [[TMP3]], -1095216660481 +// LLVM: [[TMP5:%.*]] = or i64 [[TMP4]], 12884901888 +// LLVM: store volatile i64 [[TMP5]], ptr [[TMP2]], align 8 + +// OGCG: define dso_local void @set_volatile +// OGCG: [[TMP0:%.*]] = alloca ptr, align 8 +// OGCG: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// OGCG: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 4 +// OGCG: [[TMP3:%.*]] = and i64 [[TMP2]], -1095216660481 +// OGCG: [[TMP4:%.*]] = or i64 [[TMP3]], 12884901888 +// OGCG: store volatile i64 [[TMP4]], ptr [[TMP1]], align 4 diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp index 7372acaeb9e06..6715ebf1f48b6 100644 --- a/clang/test/CIR/CodeGen/bitfields.cpp +++ b/clang/test/CIR/CodeGen/bitfields.cpp @@ -58,3 +58,70 @@ int load_field(S* s) { // OGCG: [[TMP3:%.*]] = shl i64 [[TMP2]], 15 // OGCG: [[TMP4:%.*]] = ashr i64 [[TMP3]], 47 // OGCG: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 + +void store_field() { + S s; + s.a = 3; +} +// CIR: cir.func dso_local @_Z11store_field +// CIR: [[TMP0:%.*]] = cir.alloca !rec_S, !cir.ptr +// CIR: [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i +// CIR: [[TMP2:%.*]] = cir.get_member [[TMP0]][0] {name = "a"} : !cir.ptr -> !cir.ptr +// CIR: cir.set_bitfield(#bfi_a, [[TMP2]] : !cir.ptr, [[TMP1]] : !s32i) + +// LLVM: define dso_local void @_Z11store_fieldv +// LLVM: [[TMP0:%.*]] = alloca %struct.S, i64 1, align 4 +// LLVM: [[TMP1:%.*]] = getelementptr %struct.S, ptr [[TMP0]], i32 0, i32 0 +// LLVM: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// LLVM: [[TMP3:%.*]] = and i64 [[TMP2]], -16 +// LLVM: [[TMP4:%.*]] = or i64 [[TMP3]], 3 +// LLVM: store i64 [[TMP4]], ptr [[TMP1]], align 8 + +// OGCG: define dso_local void @_Z11store_fieldv() +// OGCG: [[TMP0:%.*]] = alloca %struct.S, align 4 +// OGCG: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 4 +// OGCG: [[TMP2:%.*]] = and i64 [[TMP1]], -16 +// OGCG: [[TMP3:%.*]] = or i64 [[TMP2]], 3 +// OGCG: store i64 [[TMP3]], ptr [[TMP0]], align 4 + +void store_bitfield_to_bitfield(S* s) { + s->a = s->b = 3; +} + +// CIR: cir.func dso_local @_Z26store_bitfield_to_bitfieldP1S +// CIR: [[TMP0:%.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["s", init] {alignment = 8 : i64} +// CIR: [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i +// CIR: [[TMP2:%.*]] = cir.load align(8) [[TMP0]] : !cir.ptr>, !cir.ptr +// CIR: [[TMP3:%.*]] = cir.get_member [[TMP2]][0] {name = "b"} : !cir.ptr -> !cir.ptr +// CIR: [[TMP4:%.*]] = cir.set_bitfield(#bfi_b, [[TMP3]] : !cir.ptr, [[TMP1]] : !s32i) -> !s32i +// CIR: [[TMP5:%.*]] = cir.load align(8) [[TMP0]] : !cir.ptr>, !cir.ptr +// CIR: [[TMP6:%.*]] = cir.get_member [[TMP5]][0] {name = "a"} : !cir.ptr -> !cir.ptr +// CIR: [[TMP7:%.*]] = cir.set_bitfield(#bfi_a, [[TMP6]] : !cir.ptr, [[TMP4]] : !s32i) -> !s32i + +// LLVM: define dso_local void @_Z26store_bitfield_to_bitfieldP1S +// LLVM: [[TMP0:%.*]] = alloca ptr, i64 1, align 8 +// LLVM: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// LLVM: [[TMP2:%.*]] = getelementptr %struct.S, ptr [[TMP1]], i32 0, i32 0 +// LLVM: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 +// LLVM: [[TMP4:%.*]] = and i64 [[TMP3]], -2147483633 +// LLVM: [[TMP5:%.*]] = or i64 [[TMP4]], 48 +// LLVM: store i64 [[TMP5]], ptr [[TMP2]], align 8 +// LLVM: [[TMP6:%.*]] = load ptr, ptr [[TMP0]], align 8 +// LLVM: [[TMP7:%.*]] = getelementptr %struct.S, ptr [[TMP6]], i32 0, i32 0 +// LLVM: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// LLVM: [[TMP9:%.*]] = and i64 [[TMP8]], -16 +// LLVM: [[TMP10:%.*]] = or i64 [[TMP9]], 3 +// LLVM: store i64 [[TMP10]], ptr [[TMP7]], align 8 + +// OGCG: define dso_local void @_Z26store_bitfield_to_bitfieldP1S +// OGCG: [[TMP0:%.*]] = alloca ptr, align 8 +// OGCG: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// OGCG: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4 +// OGCG: [[TMP3:%.*]] = and i64 [[TMP2]], -2147483633 +// OGCG: [[TMP4:%.*]] = or i64 [[TMP3]], 48 +// OGCG: store i64 [[TMP4]], ptr [[TMP1]], align 4 +// OGCG: [[TMP5:%.*]] = load ptr, ptr [[TMP0]], align 8 +// OGCG: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4 +// OGCG: [[TMP7:%.*]] = and i64 [[TMP6]], -16 +// OGCG: [[TMP8:%.*]] = or i64 [[TMP7]], 3 +// OGCG: store i64 [[TMP8]], ptr [[TMP5]], align 4 diff --git a/clang/test/CIR/CodeGen/bitfields_be.c b/clang/test/CIR/CodeGen/bitfields_be.c index e839bc2b9698d..6133927b67d21 100644 --- a/clang/test/CIR/CodeGen/bitfields_be.c +++ b/clang/test/CIR/CodeGen/bitfields_be.c @@ -42,3 +42,73 @@ int init(S* s) { //OGCG: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 //OGCG: [[TMP3:%.*]] = shl i32 [[TMP2]], 15 //OGCG: [[TMP4:%.*]] = ashr i32 [[TMP3]], 15 + + +void load(S* s) { + s->a = -4; + s->b = 42; + s->c = -12345; +} + +// field 'a' +// CIR: cir.func dso_local @load +// CIR: %[[PTR0:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["s", init] {alignment = 8 : i64} loc(#loc35) +// CIR: %[[CONST1:.*]] = cir.const #cir.int<4> : !s32i +// CIR: %[[MIN1:.*]] = cir.unary(minus, %[[CONST1]]) nsw : !s32i, !s32i +// CIR: %[[VAL0:.*]] = cir.load align(8) %[[PTR0]] : !cir.ptr>, !cir.ptr +// CIR: %[[GET0:.*]] = cir.get_member %[[VAL0]][0] {name = "a"} : !cir.ptr -> !cir.ptr +// CIR: %[[SET0:.*]] = cir.set_bitfield(#bfi_a, %[[GET0]] : !cir.ptr, %[[MIN1]] : !s32i) -> !s32i + +// LLVM: define dso_local void @load +// LLVM: %[[PTR0:.*]] = load ptr +// LLVM: %[[GET0:.*]] = getelementptr %struct.S, ptr %[[PTR0]], i32 0, i32 0 +// LLVM: %[[VAL0:.*]] = load i32, ptr %[[GET0]], align 4 +// LLVM: %[[AND0:.*]] = and i32 %[[VAL0]], 268435455 +// LLVM: %[[OR0:.*]] = or i32 %[[AND0]], -1073741824 +// LLVM: store i32 %[[OR0]], ptr %[[GET0]] + +// OGCG: define dso_local void @load +// OGCG: %[[PTR0:.*]] = load ptr +// OGCG: %[[VAL0:.*]] = load i32, ptr %[[PTR0]] +// OGCG: %[[AND0:.*]] = and i32 %[[VAL0]], 268435455 +// OGCG: %[[OR0:.*]] = or i32 %[[AND0]], -1073741824 +// OGCG: store i32 %[[OR0]], ptr %[[PTR0]] + +// field 'b' +// CIR: %[[CONST2:.*]] = cir.const #cir.int<42> : !s32i +// CIR: %[[VAL1:.*]] = cir.load align(8) %[[PTR0]] : !cir.ptr>, !cir.ptr +// CIR: %[[GET1:.*]] = cir.get_member %[[VAL1]][0] {name = "b"} : !cir.ptr -> !cir.ptr +// CIR: %[[SET1:.*]] = cir.set_bitfield(#bfi_b, %[[GET1]] : !cir.ptr, %[[CONST2]] : !s32i) -> !s32i + +// LLVM: %[[PTR1:.*]] = load ptr +// LLVM: %[[GET1:.*]] = getelementptr %struct.S, ptr %[[PTR1]], i32 0, i32 0 +// LLVM: %[[VAL1:.*]] = load i32, ptr %[[GET1]], align 4 +// LLVM: %[[AND1:.*]] = and i32 %[[VAL1]], -268304385 +// LLVM: %[[OR1:.*]] = or i32 %[[AND1]], 5505024 +// LLVM: store i32 %[[OR1]], ptr %[[GET1]] + +// OGCG: %[[PTR1:.*]] = load ptr +// OGCG: %[[VAL1:.*]] = load i32, ptr %[[PTR1]] +// OGCG: %[[AND1:.*]] = and i32 %[[VAL1]], -268304385 +// OGCG: %[[OR1:.*]] = or i32 %[[AND1]], 5505024 +// OGCG: store i32 %[[OR1]], ptr %[[PTR1]] + +// field 'c' +// CIR: %[[CONST3:.*]] = cir.const #cir.int<12345> : !s32i +// CIR: %[[MIN2:.*]] = cir.unary(minus, %[[CONST3]]) nsw : !s32i, !s32i +// CIR: %[[VAL2:.*]] = cir.load align(8) %[[PTR0]] : !cir.ptr>, !cir.ptr +// CIR: %[[GET2:.*]] = cir.get_member %[[VAL2]][0] {name = "c"} : !cir.ptr -> !cir.ptr +// CIR: %[[SET2:.*]] = cir.set_bitfield(#bfi_c, %[[GET2]] : !cir.ptr, %[[MIN2]] : !s32i) -> !s32i + +// LLVM: %[[PTR2:.*]] = load ptr +// LLVM: %[[GET2:.*]] = getelementptr %struct.S, ptr %[[PTR2]], i32 0, i32 0 +// LLVM: %[[VAL2:.*]] = load i32, ptr %[[GET2]], align 4 +// LLVM: %[[AND2:.*]] = and i32 %[[VAL2]], -131072 +// LLVM: %[[OR2:.*]] = or i32 %[[AND2]], 118727 +// LLVM: store i32 %[[OR2]], ptr %[[GET2]] + +// OGCG: %[[PTR2:.*]] = load ptr +// OGCG: %[[VAL2:.*]] = load i32, ptr %[[PTR2]] +// OGCG: %[[AND2:.*]] = and i32 %[[VAL2]], -131072 +// OGCG: %[[OR2:.*]] = or i32 %[[AND2]], 118727 +// OGCG: store i32 %[[OR2]], ptr %[[PTR2]] diff --git a/clang/test/CIR/CodeGen/complex-arithmetic.cpp b/clang/test/CIR/CodeGen/complex-arithmetic.cpp new file mode 100644 index 0000000000000..5131c075744c8 --- /dev/null +++ b/clang/test/CIR/CodeGen/complex-arithmetic.cpp @@ -0,0 +1,160 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +void foo() { + int _Complex a; + int _Complex b; + int _Complex c = a + b; +} + +// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["a"] +// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["b"] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr>, !cir.complex +// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr>, !cir.complex +// CIR: %[[ADD:.*]] = cir.complex.add %[[TMP_A]], %[[TMP_B]] : !cir.complex + +// LLVM: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[COMPLEX_A]], align 4 +// LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[COMPLEX_B]], align 4 +// LLVM: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0 +// LLVM: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1 +// LLVM: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0 +// LLVM: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 1 +// LLVM: %[[ADD_REAL:.*]] = add i32 %[[A_REAL]], %[[B_REAL]] +// LLVM: %[[ADD_IMAG:.*]] = add i32 %[[A_IMAG]], %[[B_IMAG]] +// LLVM: %[[RESULT:.*]] = insertvalue { i32, i32 } poison, i32 %[[ADD_REAL]], 0 +// LLVM: %[[RESULT_2:.*]] = insertvalue { i32, i32 } %[[RESULT]], i32 %[[ADD_IMAG]], 1 + +// OGCG: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[RESULT:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_A]], i32 0, i32 0 +// OGCG: %[[A_REAL:.*]] = load i32, ptr %[[A_REAL_PTR]], align 4 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_A]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load i32, ptr %[[A_IMAG_PTR]], align 4 +// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 0 +// OGCG: %[[B_REAL:.*]] = load i32, ptr %[[B_REAL_PTR]], align 4 +// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 1 +// OGCG: %[[B_IMAG:.*]] = load i32, ptr %[[B_IMAG_PTR]], align 4 +// OGCG: %[[ADD_REAL:.*]] = add i32 %[[A_REAL]], %[[B_REAL]] +// OGCG: %[[ADD_IMAG:.*]] = add i32 %[[A_IMAG]], %[[B_IMAG]] +// OGCG: %[[RESULT_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[RESULT]], i32 0, i32 0 +// OGCG: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[RESULT]], i32 0, i32 1 +// OGCG: store i32 %[[ADD_REAL]], ptr %[[RESULT_REAL_PTR]], align 4 +// OGCG: store i32 %[[ADD_IMAG]], ptr %[[RESULT_IMAG_PTR]], align 4 + +void foo2() { + float _Complex a; + float _Complex b; + float _Complex c = a + b; +} + +// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["a"] +// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["b"] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr>, !cir.complex +// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr>, !cir.complex +// CIR: %[[ADD:.*]] = cir.complex.add %[[TMP_A]], %[[TMP_B]] : !cir.complex + +// LLVM: %[[COMPLEX_A:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[COMPLEX_B:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[COMPLEX_A]], align 4 +// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[COMPLEX_B]], align 4 +// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0 +// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1 +// LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0 +// LLVM: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1 +// LLVM: %[[ADD_REAL:.*]] = fadd float %[[A_REAL]], %[[B_REAL]] +// LLVM: %[[ADD_IMAG:.*]] = fadd float %[[A_IMAG]], %[[B_IMAG]] +// LLVM: %[[RESULT:.*]] = insertvalue { float, float } poison, float %[[ADD_REAL]], 0 +// LLVM: %[[RESULT_2:.*]] = insertvalue { float, float } %[[RESULT]], float %[[ADD_IMAG]], 1 + +// OGCG: %[[COMPLEX_A:.*]] = alloca { float, float }, align 4 +// OGCG: %[[COMPLEX_B:.*]] = alloca { float, float }, align 4 +// OGCG: %[[RESULT:.*]] = alloca { float, float }, align 4 +// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 0 +// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4 +// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 0 +// OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4 +// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 1 +// OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4 +// OGCG: %[[ADD_REAL:.*]] = fadd float %[[A_REAL]], %[[B_REAL]] +// OGCG: %[[ADD_IMAG:.*]] = fadd float %[[A_IMAG]], %[[B_IMAG]] +// OGCG: %[[RESULT_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT]], i32 0, i32 0 +// OGCG: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT]], i32 0, i32 1 +// OGCG: store float %[[ADD_REAL]], ptr %[[RESULT_REAL_PTR]], align 4 +// OGCG: store float %[[ADD_IMAG]], ptr %[[RESULT_IMAG_PTR]], align 4 + +void foo3() { + float _Complex a; + float _Complex b; + float _Complex c; + float _Complex d = (a + b) + c; +} + +// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["a"] +// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["b"] +// CIR: %[[COMPLEX_C:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c"] +// CIR: %[[RESULT:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["d", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr>, !cir.complex +// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr>, !cir.complex +// CIR: %[[ADD_A_B:.*]] = cir.complex.add %[[TMP_A]], %[[TMP_B]] : !cir.complex +// CIR: %[[TMP_C:.*]] = cir.load{{.*}} %[[COMPLEX_C]] : !cir.ptr>, !cir.complex +// CIR: %[[ADD_A_B_C:.*]] = cir.complex.add %[[ADD_A_B]], %[[TMP_C]] : !cir.complex +// CIR: cir.store{{.*}} %[[ADD_A_B_C]], %[[RESULT]] : !cir.complex, !cir.ptr> + +// LLVM: %[[COMPLEX_A:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[COMPLEX_B:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[COMPLEX_C:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[RESULT:.*]] = alloca { float, float }, i64 1, align 4 +// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[COMPLEX_A]], align 4 +// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[COMPLEX_B]], align 4 +// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0 +// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1 +// LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0 +// LLVM: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1 +// LLVM: %[[ADD_REAL_A_B:.*]] = fadd float %[[A_REAL]], %[[B_REAL]] +// LLVM: %[[ADD_IMAG_A_B:.*]] = fadd float %[[A_IMAG]], %[[B_IMAG]] +// LLVM: %[[A_B:.*]] = insertvalue { float, float } poison, float %[[ADD_REAL_A_B]], 0 +// LLVM: %[[TMP_A_B:.*]] = insertvalue { float, float } %[[A_B]], float %[[ADD_IMAG_A_B]], 1 +// LLVM: %[[TMP_C:.*]] = load { float, float }, ptr %[[COMPLEX_C]], align 4 +// LLVM: %[[A_B_REAL:.*]] = extractvalue { float, float } %[[TMP_A_B]], 0 +// LLVM: %[[A_B_IMAG:.*]] = extractvalue { float, float } %[[TMP_A_B]], 1 +// LLVM: %[[C_REAL:.*]] = extractvalue { float, float } %[[TMP_C]], 0 +// LLVM: %[[C_IMAG:.*]] = extractvalue { float, float } %[[TMP_C]], 1 +// LLVM: %[[ADD_REAL_A_B_C:.*]] = fadd float %[[A_B_REAL]], %[[C_REAL]] +// LLVM: %[[ADD_IMAG_A_B_C:.*]] = fadd float %[[A_B_IMAG]], %[[C_IMAG]] +// LLVM: %[[A_B_C:.*]] = insertvalue { float, float } poison, float %[[ADD_REAL_A_B_C]], 0 +// LLVM: %[[TMP_A_B_C:.*]] = insertvalue { float, float } %[[A_B_C]], float %[[ADD_IMAG_A_B_C]], 1 +// LLVM: store { float, float } %[[TMP_A_B_C]], ptr %[[RESULT]], align 4 + +// OGCG: %[[COMPLEX_A:.*]] = alloca { float, float }, align 4 +// OGCG: %[[COMPLEX_B:.*]] = alloca { float, float }, align 4 +// OGCG: %[[COMPLEX_C:.*]] = alloca { float, float }, align 4 +// OGCG: %[[RESULT:.*]] = alloca { float, float }, align 4 +// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 0 +// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4 +// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 0 +// OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4 +// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 1 +// OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4 +// OGCG: %[[ADD_REAL_A_B:.*]] = fadd float %[[A_REAL]], %[[B_REAL]] +// OGCG: %[[ADD_IMAG_A_B:.*]] = fadd float %[[A_IMAG]], %[[B_IMAG]] +// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_C]], i32 0, i32 0 +// OGCG: %[[C_REAL:.*]] = load float, ptr %[[C_REAL_PTR]], align 4 +// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_C]], i32 0, i32 1 +// OGCG: %[[C_IMAG:.*]] = load float, ptr %[[C_IMAG_PTR]], align 4 +// OGCG: %[[ADD_REAL_A_B_C:.*]] = fadd float %[[ADD_REAL_A_B]], %[[C_REAL]] +// OGCG: %[[ADD_IMAG_A_B_C:.*]] = fadd float %[[ADD_IMAG_A_B]], %[[C_IMAG]] +// OGCG: %[[RESULT_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT]], i32 0, i32 0 +// OGCG: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT]], i32 0, i32 1 +// OGCG: store float %[[ADD_REAL_A_B_C]], ptr %[[RESULT_REAL_PTR]], align 4 +// OGCG: store float %[[ADD_IMAG_A_B_C]], ptr %[[RESULT_IMAG_PTR]], align 4 diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index 6e7e889df146f..88df771e6f272 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -31,7 +31,7 @@ float _Complex cf2 = { 1.0f, 2.0f }; void foo() { int _Complex c = {}; } // CIR: %[[INIT:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c", init] -// CIR: %[[COMPLEX:.*]] = cir.const #cir.const_complex<#cir.int<0> : !s32i, #cir.int<0> : !s32i> : !cir.complex +// CIR: %[[COMPLEX:.*]] = cir.const #cir.zero : !cir.complex // CIR: cir.store{{.*}} %[[COMPLEX]], %[[INIT]] : !cir.complex, !cir.ptr> // LLVM: %[[INIT:.*]] = alloca { i32, i32 }, i64 1, align 4 @@ -216,6 +216,20 @@ void foo9(double a, double b) { // OGCG: store double %[[TMP_A]], ptr %[[C_REAL_PTR]], align 8 // OGCG: store double %[[TMP_B]], ptr %[[C_IMAG_PTR]], align 8 +void foo10() { + double _Complex c; + double *realPtr = &__real__ c; +} + +// CIR: %[[COMPLEX:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c"] +// CIR: %[[REAL_PTR:.*]] = cir.complex.real_ptr %[[COMPLEX]] : !cir.ptr> -> !cir.ptr + +// LLVM: %[[COMPLEX:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[COMPLEX]], i32 0, i32 0 + +// OGCG: %[[COMPLEX:.*]] = alloca { double, double }, align 8 +// OGCG: %[[REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[COMPLEX]], i32 0, i32 0 + void foo12() { double _Complex c; double imag = __imag__ c; @@ -741,7 +755,7 @@ void foo29() { } // CIR: %[[INIT:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["a", init] -// CIR: %[[COMPLEX:.*]] = cir.const #cir.const_complex<#cir.int<0> : !s32i, #cir.int<0> : !s32i> : !cir.complex +// CIR: %[[COMPLEX:.*]] = cir.const #cir.zero : !cir.complex // CIR: cir.store{{.*}} %[[COMPLEX]], %[[INIT]] : !cir.complex, !cir.ptr> // LLVM: %[[INIT:.*]] = alloca { i32, i32 }, i64 1, align 4 @@ -751,4 +765,4 @@ void foo29() { // OGCG: %[[INIT_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[INIT]], i32 0, i32 0 // OGCG: %[[INIT_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[INIT]], i32 0, i32 1 // OGCG: store i32 0, ptr %[[INIT_REAL_PTR]], align 4 -// OGCG: store i32 0, ptr %[[INIT_IMAG_PTR]], align 4 \ No newline at end of file +// OGCG: store i32 0, ptr %[[INIT_IMAG_PTR]], align 4 diff --git a/clang/test/CIR/CodeGen/ctor.cpp b/clang/test/CIR/CodeGen/ctor.cpp index 4c2877f8460d0..2b06bb0f7cb08 100644 --- a/clang/test/CIR/CodeGen/ctor.cpp +++ b/clang/test/CIR/CodeGen/ctor.cpp @@ -219,3 +219,130 @@ void init_union() { // CHECK-NEXT: %[[S_ADDR:.*]] = cir.alloca {{.*}} ["s", init] // CHECK-NEXT: cir.call @_ZN14UnionInitStrukC1Ev(%[[S_ADDR]]) // CHECK-NEXT: cir.return + +struct Base { + int a; + Base(int val) : a(val) {} +}; + +struct Derived : Base { + Derived(int val) : Base(val) {} +}; + +void test_derived() { + Derived d(1); +} + +// CHECK: cir.func{{.*}} @_ZN4BaseC2Ei(%arg0: !cir.ptr {{.*}}, %arg1: !s32i +// CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] +// CHECK-NEXT: %[[VAL_ADDR:.*]] = cir.alloca {{.*}} ["val", init] +// CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] +// CHECK-NEXT: cir.store %arg1, %[[VAL_ADDR]] +// CHECK-NEXT: %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]] +// CHECK-NEXT: %[[A_ADDR:.*]] = cir.get_member %[[THIS]][0] {name = "a"} +// CHECK-NEXT: %[[VAL:.*]] = cir.load{{.*}} %[[VAL_ADDR]] +// CHECK-NEXT: cir.store{{.*}} %[[VAL]], %[[A_ADDR]] +// CHECK-NEXT: cir.return + +// CHECK: cir.func{{.*}} @_ZN7DerivedC2Ei(%arg0: !cir.ptr {{.*}}, %arg1: !s32i +// CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] +// CHECK-NEXT: %[[VAL_ADDR:.*]] = cir.alloca {{.*}} ["val", init] +// CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] +// CHECK-NEXT: cir.store %arg1, %[[VAL_ADDR]] +// CHECK-NEXT: %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]] +// CHECK-NEXT: %[[BASE:.*]] = cir.base_class_addr %[[THIS]] : !cir.ptr nonnull [0] -> !cir.ptr +// CHECK-NEXT: %[[VAL:.*]] = cir.load{{.*}} %[[VAL_ADDR]] +// CHECK-NEXT: cir.call @_ZN4BaseC2Ei(%[[BASE]], %[[VAL]]) +// CHECK-NEXT: cir.return + +// CHECK: cir.func{{.*}} @_ZN7DerivedC1Ei(%arg0: !cir.ptr {{.*}}, %arg1: !s32i +// CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] +// CHECK-NEXT: %[[VAL_ADDR:.*]] = cir.alloca {{.*}} ["val", init] +// CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] +// CHECK-NEXT: cir.store %arg1, %[[VAL_ADDR]] +// CHECK-NEXT: %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]] +// CHECK-NEXT: %[[VAL:.*]] = cir.load{{.*}} %[[VAL_ADDR]] +// CHECK-NEXT: cir.call @_ZN7DerivedC2Ei(%[[THIS]], %[[VAL]]) +// CHECK-NEXT: cir.return + +// CHECK: cir.func{{.*}} @_Z12test_derivedv +// CHECK-NEXT: %[[D_ADDR:.*]] = cir.alloca {{.*}} ["d", init] +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i +// CHECK-NEXT: cir.call @_ZN7DerivedC1Ei(%[[D_ADDR]], %[[ONE]]) +// CHECK-NEXT: cir.return + +struct Base2 { + int b; + Base2(int val) : b(val) {} +}; + +struct Derived2 : Base, Base2 { + int c; + Derived2(int val1, int val2, int val3) : Base(val1), Base2(val2), c(val3) {} +}; + +void test_derived2() { + Derived2 d(1, 2, 3); +} + +// CHECK: cir.func{{.*}} @_ZN5Base2C2Ei(%arg0: !cir.ptr {{.*}}, %arg1: !s32i +// CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] +// CHECK-NEXT: %[[VAL_ADDR:.*]] = cir.alloca {{.*}} ["val", init] +// CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] +// CHECK-NEXT: cir.store %arg1, %[[VAL_ADDR]] +// CHECK-NEXT: %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]] +// CHECK-NEXT: %[[B_ADDR:.*]] = cir.get_member %[[THIS]][0] {name = "b"} +// CHECK-NEXT: %[[VAL:.*]] = cir.load{{.*}} %[[VAL_ADDR]] +// CHECK-NEXT: cir.store{{.*}} %[[VAL]], %[[B_ADDR]] +// CHECK-NEXT: cir.return + +// CHECK: cir.func{{.*}} @_ZN8Derived2C2Eiii(%arg0: !cir.ptr +// CHECK-SAME: %arg1: !s32i +// CHECK-SAME: %arg2: !s32i +// CHECK-SAME: %arg3: !s32i +// CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] +// CHECK-NEXT: %[[VAL1_ADDR:.*]] = cir.alloca {{.*}} ["val1", init] +// CHECK-NEXT: %[[VAL2_ADDR:.*]] = cir.alloca {{.*}} ["val2", init] +// CHECK-NEXT: %[[VAL3_ADDR:.*]] = cir.alloca {{.*}} ["val3", init] +// CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] +// CHECK-NEXT: cir.store %arg1, %[[VAL1_ADDR]] +// CHECK-NEXT: cir.store %arg2, %[[VAL2_ADDR]] +// CHECK-NEXT: cir.store %arg3, %[[VAL3_ADDR]] +// CHECK-NEXT: %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]] +// CHECK-NEXT: %[[BASE:.*]] = cir.base_class_addr %[[THIS]] : !cir.ptr nonnull [0] -> !cir.ptr +// CHECK-NEXT: %[[VAL1:.*]] = cir.load{{.*}} %[[VAL1_ADDR]] +// CHECK-NEXT: cir.call @_ZN4BaseC2Ei(%[[BASE]], %[[VAL1]]) +// CHECK-NEXT: %[[BASE2:.*]] = cir.base_class_addr %[[THIS]] : !cir.ptr nonnull [4] -> !cir.ptr +// CHECK-NEXT: %[[VAL2:.*]] = cir.load{{.*}} %[[VAL2_ADDR]] +// CHECK-NEXT: cir.call @_ZN5Base2C2Ei(%[[BASE2]], %[[VAL2]]) +// CHECK-NEXT: %[[C_ADDR:.*]] = cir.get_member %[[THIS]][2] {name = "c"} +// CHECK-NEXT: %[[VAL3:.*]] = cir.load{{.*}} %[[VAL3_ADDR]] +// CHECK-NEXT: cir.store{{.*}} %[[VAL3]], %[[C_ADDR]] +// CHECK-NEXT: cir.return + +// CHECK: cir.func{{.*}} @_ZN8Derived2C1Eiii(%arg0: !cir.ptr +// CHECK-SAME: %arg1: !s32i +// CHECK-SAME: %arg2: !s32i +// CHECK-SAME: %arg3: !s32i +// CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] +// CHECK-NEXT: %[[VAL1_ADDR:.*]] = cir.alloca {{.*}} ["val1", init] +// CHECK-NEXT: %[[VAL2_ADDR:.*]] = cir.alloca {{.*}} ["val2", init] +// CHECK-NEXT: %[[VAL3_ADDR:.*]] = cir.alloca {{.*}} ["val3", init] +// CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] +// CHECK-NEXT: cir.store %arg1, %[[VAL1_ADDR]] +// CHECK-NEXT: cir.store %arg2, %[[VAL2_ADDR]] +// CHECK-NEXT: cir.store %arg3, %[[VAL3_ADDR]] +// CHECK-NEXT: %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]] +// CHECK-NEXT: %[[VAL1:.*]] = cir.load{{.*}} %[[VAL1_ADDR]] +// CHECK-NEXT: %[[VAL2:.*]] = cir.load{{.*}} %[[VAL2_ADDR]] +// CHECK-NEXT: %[[VAL3:.*]] = cir.load{{.*}} %[[VAL3_ADDR]] +// CHECK-NEXT: cir.call @_ZN8Derived2C2Eiii(%[[THIS]], %[[VAL1]], %[[VAL2]], %[[VAL3]]) +// CHECK-NEXT: cir.return + +// CHECK: cir.func{{.*}} @_Z13test_derived2v +// CHECK-NEXT: %[[D_ADDR:.*]] = cir.alloca {{.*}} ["d", init] +// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i +// CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i +// CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i +// CHECK-NEXT: cir.call @_ZN8Derived2C1Eiii(%[[D_ADDR]], %[[ONE]], %[[TWO]], %[[THREE]]) +// CHECK-NEXT: cir.return diff --git a/clang/test/CIR/CodeGen/enum.cpp b/clang/test/CIR/CodeGen/enum.cpp index 5d9b1057aaa14..247fa0a3bfd43 100644 --- a/clang/test/CIR/CodeGen/enum.cpp +++ b/clang/test/CIR/CodeGen/enum.cpp @@ -14,3 +14,14 @@ int f() { // CHECK: cir.func{{.*}} @_Z1fv // CHECK: cir.const #cir.int<1> : !u32i + +namespace test { + using enum Numbers; +}; + +int f2() { + return test::Two; +} + +// CHECK: cir.func{{.*}} @_Z2f2v +// CHECK: cir.const #cir.int<2> : !u32i diff --git a/clang/test/CIR/CodeGen/namespace.cpp b/clang/test/CIR/CodeGen/namespace.cpp index efae1f2f2f236..4c7812c61bfe4 100644 --- a/clang/test/CIR/CodeGen/namespace.cpp +++ b/clang/test/CIR/CodeGen/namespace.cpp @@ -93,3 +93,11 @@ void f7() { } // CHECK: cir.func{{.*}} @_Z2f7v() + +namespace test_alias = test; + +int f8() { + return test_alias::g2; +} + +// CHECK: cir.func{{.*}} @_Z2f8v() diff --git a/clang/test/CIR/IR/invalid-complex.cir b/clang/test/CIR/IR/invalid-complex.cir index 2414809f7dbca..3a11b631a2ac7 100644 --- a/clang/test/CIR/IR/invalid-complex.cir +++ b/clang/test/CIR/IR/invalid-complex.cir @@ -45,3 +45,15 @@ module { cir.return } } + + +// ----- + +module { + cir.func @complex_real_ptr_invalid_result_type() -> !cir.double { + %0 = cir.alloca !cir.complex, !cir.ptr>, ["c"] + // expected-error @below {{result type does not match operand type}} + %1 = cir.complex.real_ptr %0 : !cir.ptr> -> !cir.ptr + cir.return + } +} diff --git a/clang/test/CIR/Transforms/complex-imag-fold.cir b/clang/test/CIR/Transforms/complex-imag-fold.cir index 0d9a4e43142a3..56e062d5285a2 100644 --- a/clang/test/CIR/Transforms/complex-imag-fold.cir +++ b/clang/test/CIR/Transforms/complex-imag-fold.cir @@ -1,4 +1,4 @@ -// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s +// RUN: cir-opt %s -cir-canonicalize -split-input-file -o - | FileCheck %s !s32i = !cir.int @@ -21,3 +21,19 @@ module { // CHECK: } } + +// ----- + +!s32i = !cir.int + +module { + cir.func dso_local @fold_complex_imag_from_create_test(%arg0: !s32i, %arg1: !s32i) -> !s32i { + %0 = cir.complex.create %arg0, %arg1 : !s32i -> !cir.complex + %1 = cir.complex.imag %0 : !cir.complex -> !s32i + cir.return %1 : !s32i + } + + // CHECK: cir.func dso_local @fold_complex_imag_from_create_test(%[[ARG_0:.*]]: !s32i, %[[ARG_1:.*]]: !s32i) -> !s32i { + // CHECK: cir.return %[[ARG_1]] : !s32i + // CHECK: } +} diff --git a/clang/test/CIR/Transforms/complex-real-fold.cir b/clang/test/CIR/Transforms/complex-real-fold.cir index 1cab9be616af0..29b03276f822d 100644 --- a/clang/test/CIR/Transforms/complex-real-fold.cir +++ b/clang/test/CIR/Transforms/complex-real-fold.cir @@ -1,4 +1,4 @@ -// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s +// RUN: cir-opt %s -cir-canonicalize -split-input-file -o - | FileCheck %s !s32i = !cir.int @@ -21,3 +21,19 @@ module { // CHECK: } } + +// ----- + +!s32i = !cir.int + +module { + cir.func dso_local @fold_complex_real_from_create_test(%arg0: !s32i, %arg1: !s32i) -> !s32i { + %0 = cir.complex.create %arg0, %arg1 : !s32i -> !cir.complex + %1 = cir.complex.real %0 : !cir.complex -> !s32i + cir.return %1 : !s32i + } + + // CHECK: cir.func dso_local @fold_complex_real_from_create_test(%[[ARG_0:.*]]: !s32i, %[[ARG_1:.*]]: !s32i) -> !s32i { + // CHECK: cir.return %[[ARG_0]] : !s32i + // CHECK: } +} diff --git a/clang/test/CXX/basic/basic.link/p3.cpp b/clang/test/CXX/basic/basic.link/p3.cpp index 01202264d2591..e6633a777ddef 100644 --- a/clang/test/CXX/basic/basic.link/p3.cpp +++ b/clang/test/CXX/basic/basic.link/p3.cpp @@ -1,35 +1,18 @@ -// RUN: %clang_cc1 -std=c++2a -verify %s -// RUN: %clang_cc1 -std=c++2a -verify %s -DIMPORT_ERROR=1 -// RUN: %clang_cc1 -std=c++2a -verify %s -DIMPORT_ERROR=2 +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -std=c++20 -verify %t/M.cpp +// RUN: %clang_cc1 -std=c++20 -verify %t/ImportError1.cpp +// RUN: %clang_cc1 -std=c++20 -verify %t/ImportError2.cpp + +//--- M.cpp module; -#if IMPORT_ERROR != 2 struct import { struct inner {}; }; -#endif struct module { struct inner {}; }; - constexpr int n = 123; export module m; // #1 - -// Import errors are fatal, so we test them in isolation. -#if IMPORT_ERROR == 1 -import x = {}; // expected-error {{expected ';' after module name}} - // expected-error@-1 {{module 'x' not found}} - -#elif IMPORT_ERROR == 2 -struct X; -template struct import; -template<> struct import { - static X y; -}; - -// This is not valid because the 'import ' is a pp-import, even though it -// grammatically can't possibly be an import declaration. -struct X {} import::y; // expected-error {{'n' file not found}} - -#else module y = {}; // expected-error {{multiple module declarations}} expected-error 2{{}} // expected-note@#1 {{previous module declaration}} @@ -51,4 +34,36 @@ template module module_var_template; // This is a variable named 'import' that shadows the type 'import' above. struct X {} import; -#endif + +//--- ImportError1.cpp +module; + +struct import { struct inner {}; }; +struct module { struct inner {}; }; + +constexpr int n = 123; + +export module m; // #1 + +import x = {}; // expected-error {{expected ';' after module name}} + // expected-error@-1 {{module 'x' not found}} + +//--- ImportError2.cpp +module; + +struct module { struct inner {}; }; + +constexpr int n = 123; + +export module m; // #1 + +struct X; +template struct import; +template<> struct import { + static X y; +}; + +// This is not valid because the 'import ' is a pp-import, even though it +// grammatically can't possibly be an import declaration. +struct X {} import::y; // expected-error {{'n' file not found}} + diff --git a/clang/test/CXX/dcl.dcl/dcl.enum/p2.cpp b/clang/test/CXX/dcl.dcl/dcl.enum/p2.cpp index de826d0570422..7b69358687a2f 100644 --- a/clang/test/CXX/dcl.dcl/dcl.enum/p2.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.enum/p2.cpp @@ -1,6 +1,5 @@ // RUN: %clang_cc1 -std=c++11 -verify %s -// expected-no-diagnostics -enum class E : int const volatile { }; +enum class E : int const volatile { }; // expected-warning {{'const' and 'volatile' qualifiers in enumeration underlying type ignored}} using T = __underlying_type(E); using T = int; diff --git a/clang/test/CXX/expr/expr.post/expr.static.cast/p3-0x.cpp b/clang/test/CXX/expr/expr.post/expr.static.cast/p3-0x.cpp index 830ccda245baa..9e089557d7088 100644 --- a/clang/test/CXX/expr/expr.post/expr.static.cast/p3-0x.cpp +++ b/clang/test/CXX/expr/expr.post/expr.static.cast/p3-0x.cpp @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -// expected-no-diagnostics +// RUN: %clang_cc1 -std=c++14 -Wno-unused-value -verify %s // A glvalue of type "cv1 T1" can be cast to type "rvalue reference to // cv2 T2" if "cv2 T2" is reference-compatible with "cv1 T1" (8.5.3). @@ -23,3 +22,70 @@ void test(A &a, B &b) { const A &&ar10 = static_cast(xvalue()); const A &&ar11 = static_cast(xvalue()); } + +namespace GH121429 { + +struct C : private A { // expected-note 4 {{declared private here}} + C&& that(); + + void f() { + static_cast(*this); + static_cast(*this); + + static_cast(that()); + static_cast(that()); + } +}; +C c; +const C cc; + +void f() { + static_cast(c); // expected-error {{cannot cast 'C' to its private base class 'A'}} + static_cast(c.that()); // expected-error {{cannot cast 'C' to its private base class 'A'}} + + static_cast(c); // expected-error {{cannot cast 'C' to its private base class 'const A'}} + static_cast(c.that()); // expected-error {{cannot cast 'C' to its private base class 'const A'}} +} + +constexpr bool g() { + (A&&)c; + (A&&)(C&&)c; + (A&&)cc; + (A&&)(const C&&)c; + (const A&&)c; + (const A&&)(C&&)c; + (const A&&)cc; + (const A&&)(const C&&)c; + return true; +} +static_assert(g(), ""); + +struct D : A, B { // expected-warning {{direct base 'A' is inaccessible due to ambiguity}} + D&& rv(); +}; +D d; + +void h(const D cd) { + static_cast(d); // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + static_cast(d.rv()); // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + + static_cast(d); // expected-error {{ambiguous conversion from derived class 'D' to base class 'const A'}} + static_cast(d.rv()); // expected-error {{ambiguous conversion from derived class 'D' to base class 'const A'}} + + (A&&)d; // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + (A&&)(D&&)d; // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + (A&&)cd; // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + (A&&)(const D&&)d; // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + (const A&&)d; // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + (const A&&)(D&&)d; // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + (const A&&)cd; // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} + (const A&&)(const D&&)d; // expected-error {{ambiguous conversion from derived class 'D' to base class 'A'}} +} + +template +auto s(U u = {}) -> decltype(static_cast(u)); // expected-note 2 {{substitution failure}} + +int i = s(); // expected-error {{no matching function}} +int j = s(); // expected-error {{no matching function}} + +} diff --git a/clang/test/ClangScanDeps/visible-modules.c b/clang/test/ClangScanDeps/visible-modules.c new file mode 100644 index 0000000000000..77716a4956f00 --- /dev/null +++ b/clang/test/ClangScanDeps/visible-modules.c @@ -0,0 +1,116 @@ +// This test verifies that the modules visible to the translation unit are computed in dependency scanning. +// "client" in the first scan represents the translation unit that imports an explicit submodule, +// that only exports one other module. +// In the second scan, the translation unit that imports an explicit submodule, +// that exports an additional module. +// Thus, the dependencies of the top level module for the submodule always differ from what is visible to the TU. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/compile-commands.json.in > %t/compile-commands.json +// RUN: clang-scan-deps -emit-visible-modules -compilation-database %t/compile-commands.json \ +// RUN: -j 1 -format experimental-full 2>&1 > %t/result-first-scan.json +// RUN: cat %t/result-first-scan.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t --check-prefix=SINGLE + +/// Re-run scan with different module map for direct dependency. +// RUN: mv %t/A_with_visible_export.modulemap %t/Sysroot/usr/include/A/module.modulemap +// RUN: clang-scan-deps -emit-visible-modules -compilation-database %t/compile-commands.json \ +// RUN: -j 1 -format experimental-full 2>&1 > %t/result.json +// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t --check-prefix=MULTIPLE + +// RUN: %deps-to-rsp %t/result.json --module-name=transitive > %t/transitive.rsp +// RUN: %deps-to-rsp %t/result.json --module-name=visible > %t/visible.rsp +// RUN: %deps-to-rsp %t/result.json --module-name=invisible > %t/invisible.rsp +// RUN: %deps-to-rsp %t/result.json --module-name=A > %t/A.rsp +// RUN: %deps-to-rsp %t/result.json --tu-index=0 > %t/tu.rsp + +// RUN: %clang @%t/transitive.rsp +// RUN: %clang @%t/visible.rsp +// RUN: %clang @%t/invisible.rsp +// RUN: %clang @%t/A.rsp + +/// Verify compilation & scan agree with each other. +// RUN: not %clang @%t/tu.rsp 2>&1 | FileCheck %s --check-prefix=COMPILE + +// SINGLE: "visible-clang-modules": [ +// SINGLE-NEXT: "A" +// SINGLE-NEXT: ] + +// MULTIPLE: "visible-clang-modules": [ +// MULTIPLE-NEXT: "A", +// MULTIPLE-NEXT: "visible" +// MULTIPLE-NEXT: ] + +// COMPILE-NOT: 'visible_t' must be declared before it is used +// COMPILE: 'transitive_t' must be declared before it is used +// COMPILE: 'invisible_t' must be declared before it is used + +//--- compile-commands.json.in +[ +{ + "directory": "DIR", + "command": "clang -c DIR/client.c -isysroot DIR/Sysroot -IDIR/Sysroot/usr/include -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps", + "file": "DIR/client.c" +} +] + +//--- Sysroot/usr/include/A/module.modulemap +module A { + explicit module visibleToTU { + header "visibleToTU.h" + } + explicit module invisibleToTU { + header "invisibleToTU.h" + } +} + +//--- A_with_visible_export.modulemap +module A { + explicit module visibleToTU { + header "visibleToTU.h" + export visible + } + explicit module invisibleToTU { + header "invisibleToTU.h" + } +} + +//--- Sysroot/usr/include/A/visibleToTU.h +#include +typedef int A_visibleToTU; + +//--- Sysroot/usr/include/A/invisibleToTU.h +#include +typedef int A_invisibleToTU; + +//--- Sysroot/usr/include/invisible/module.modulemap +module invisible { + umbrella "." +} + +//--- Sysroot/usr/include/invisible/invisible.h +typedef int invisible_t; + +//--- Sysroot/usr/include/visible/module.modulemap +module visible { + umbrella "." +} + +//--- Sysroot/usr/include/visible/visible.h +#include +typedef int visible_t; + +//--- Sysroot/usr/include/transitive/module.modulemap +module transitive { + umbrella "." +} + +//--- Sysroot/usr/include/transitive/transitive.h +typedef int transitive_t; + +//--- client.c +#include +visible_t foo_v(void); +// Both decls are not visible, thus should fail to actually compile. +transitive_t foo_t(void); +invisible_t foo_i(void); diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp b/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp index c734c6953e5d1..5b74042329968 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -disable-O0-optnone -Werror -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg \ // RUN: | opt -S -passes=inline \ @@ -300,19 +300,19 @@ int test_variadic_template() __arm_inout("za") { preserves_za_decl); } -// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[NORMAL_DECL]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[SM_ENABLED_DECL]] = { "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[SM_COMPATIBLE]] = { mustprogress noinline nounwind "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[SM_COMPATIBLE_DECL]] = { "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[ZA_PRESERVED_DECL]] = { "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[ZA_NEW]] = { mustprogress noinline nounwind "aarch64_new_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[ZA_AGNOSTIC]] = { mustprogress noinline nounwind "aarch64_za_state_agnostic" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[NORMAL_DEF]] = { mustprogress noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[NORMAL_DECL]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_ENABLED_DECL]] = { "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_COMPATIBLE]] = { mustprogress noinline nounwind "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_COMPATIBLE_DECL]] = { "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_PRESERVED_DECL]] = { "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_NEW]] = { mustprogress noinline nounwind "aarch64_new_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_AGNOSTIC]] = { mustprogress noinline nounwind "aarch64_za_state_agnostic" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[NORMAL_DEF]] = { mustprogress noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } // CHECK: attributes #[[SM_ENABLED_CALL]] = { "aarch64_pstate_sm_enabled" } // CHECK: attributes #[[SM_COMPATIBLE_CALL]] = { "aarch64_pstate_sm_compatible" } // CHECK: attributes #[[SM_BODY_CALL]] = { "aarch64_pstate_sm_body" } diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_add-i64.c index c341ff9bb29e6..2889249662ae1 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_add-i64.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_add-i64.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -O1 -Werror -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mopa-za32.c index 824c43e6d247c..992c6f0c62ce7 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mopa-za32.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mopa-za32.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -O1 -Werror -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mopa-za64.c index 61c41450d6457..3e22c77a467f9 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mopa-za64.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mopa-za64.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -S -O1 -Werror -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mops-za32.c index 509ad9ec17f73..40d2c6d0cc865 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mops-za32.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mops-za32.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -O1 -Werror -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mops-za64.c index 9d205beb05f28..3c7b18da14620 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mops-za64.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_mops-za64.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -target-feature +sme-i16i64 -S -O1 -Werror -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/aarch64-sme2-attrs.cpp b/clang/test/CodeGen/AArch64/sme2-intrinsics/aarch64-sme2-attrs.cpp index 15b9ac42cbcf3..b1649efa0adf6 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/aarch64-sme2-attrs.cpp +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/aarch64-sme2-attrs.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 \ // RUN: -disable-O0-optnone -Werror -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg \ // RUN: | opt -S -passes=inline \ diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_add.c index 7e7597f82136c..ad70f6de9e66b 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_add.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_add.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_add_sub_za16.c index 6bd9eab5f1846..9cc9963840921 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_add_sub_za16.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f8f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f8f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f8f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f8f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -S -Werror -Wall -o /dev/null +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -S -Werror -Wall -o /dev/null // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bmop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bmop.c index 84521e7e47644..408772d7feebe 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bmop.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bmop.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_clamp.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_clamp.c index 1297185c4b50e..c78a3e4485379 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_clamp.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_clamp.c @@ -1,14 +1,14 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_cvt.c index 2851ea9ccd22c..d8d07365fa6e8 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_cvt.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_cvtn.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_cvtn.c index b38bf6fd35084..7e40de7d88616 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_cvtn.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_cvtn.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fmlas16.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fmlas16.c index 4338ea030b504..77f9aaaa17767 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fmlas16.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fmlas16.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -S -Werror -Wall %s -o /dev/null +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -S -Werror -Wall %s -o /dev/null // REQUIRES: aarch64-registered-target #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp_dots.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp_dots.c index 00cbfdbe7ca34..22af781de3d54 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp_dots.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp_dots.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_frint.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_frint.c index 8ab450587fc70..64bb8ed369cf4 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_frint.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_frint.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_int_dots.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_int_dots.c index 961133b89eb29..190438b2c7a02 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_int_dots.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_int_dots.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt.c index 1ab02afbe0904..51737587ff3d7 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt.c @@ -2,9 +2,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c index e97075703b185..8a3fcd34291a8 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -2,9 +2,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c index 0730812b1f06f..8b8dc97b1314c 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -2,9 +2,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt.c index b687b580b15a6..0ef2fdcb4486c 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt.c @@ -2,9 +2,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c index 1a9e9d84c6359..16c293d7d4255 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c @@ -2,9 +2,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c index c4c89358c16f8..36ae4d24be372 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c @@ -2,9 +2,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_max.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_max.c index 5d57ffb9bdf8c..dd06b232de01d 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_max.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_max.c @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_maxnm.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_maxnm.c index 1d47abe8d487c..42d12b3130c1b 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_maxnm.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_maxnm.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_min.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_min.c index 4e70a39311664..f583f639edbd5 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_min.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_min.c @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_minnm.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_minnm.c index 838cb644e5e39..037484596098e 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_minnm.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_minnm.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mla.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mla.c index 74859c0a23bbb..484a7135e95d0 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mla.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mla.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlal.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlal.c index 9d8b1351debc2..6f7c31fd8963a 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlal.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlal.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlall.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlall.c index 4efc226c10e68..34686fff93792 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlall.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlall.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mls.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mls.c index 2b1554cd9d8b0..171d6ca75de3d 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mls.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mls.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlsl.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlsl.c index e56ffaa1db03e..9d771d651002f 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlsl.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mlsl.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop.c index d25e923c24400..198d5e2fcf762 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c index 36e0b754b1d35..d6e766f7b182a 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x2.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x1.c index cd12aa523f7cb..03738c083892a 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x1.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x1.c @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x2.c index f5b6c566c2602..af81d71105763 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x2.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_2x2.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mopa_nonwide.c index 0eb391c7d79a0..3c7e14c7bddcf 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mopa_nonwide.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK -// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX -// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_read.c index c9d532d5fce45..1ae82365c6590 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_read.c @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c index 5b61102751494..4c059e350a182 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c @@ -4,10 +4,10 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_sqdmulh.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_sqdmulh.c index 5ff801666df88..0df25e76167ef 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_sqdmulh.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_sqdmulh.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_sub.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_sub.c index aca66e0d78d66..a44150ec95df1 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_sub.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_sub.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_unpkx2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_unpkx2.c index d3b09f071c58f..a78e68726058d 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_unpkx2.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_unpkx2.c @@ -2,12 +2,12 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_unpkx4.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_unpkx4.c index 45bc83eac7339..323493e9f2299 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_unpkx4.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_unpkx4.c @@ -2,12 +2,12 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vdot.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vdot.c index d9445ef03b8c1..03ce662fae0de 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vdot.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vdot.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_add.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_add.c index de983bcf79309..da85a0dad64af 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_add.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_add.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_qrshr.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_qrshr.c index 3e47a3ecc17c0..28a3d71c66cf6 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_qrshr.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_qrshr.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_rshl.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_rshl.c index af5a389c7f736..21220db3d2dbd 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_rshl.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_rshl.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx2.c index 893cc7519a1d0..f3fbae052ffe0 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx2.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx2.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx4.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx4.c index d4e77d998e3c2..7d2ef9a694ad4 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx4.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx4.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx2.c index ec3a2952b2ac6..5f020081de521 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx2.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx2.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx4.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx4.c index aeac2ae78f6e6..4085e84a460dd 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx4.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx4.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx2.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx2.c index 735b3697f150b..5b4b971f6b29e 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx2.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx2.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx4.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx4.c index 341ae290e9b0e..721465fdaa306 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx4.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx4.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write.c index 7d0fbc9479a87..e05a60f7b23f0 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write_lane_zt.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write_lane_zt.c index f295bfb2d18b4..e82273294f173 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write_lane_zt.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write_lane_zt.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme2 -target-feature +sme -target-feature +sme-lutv2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme2 -target-feature +sme -target-feature +sme-lutv2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +sme -target-feature +sme-lutv2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -target-feature +sme -target-feature +sme-lutv2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_movaz.c b/clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_movaz.c index 98324e78b16bc..bdfc6c15c5a79 100644 --- a/clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_movaz.c +++ b/clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_movaz.c @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: aarch64-registered-target -//RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +//RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_zero.c b/clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_zero.c index 7053f056e8eba..3902ca69c6973 100644 --- a/clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_zero.c +++ b/clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_zero.c @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clasta-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clasta-bfloat.c deleted file mode 100644 index 51f035906b21e..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clasta-bfloat.c +++ /dev/null @@ -1,58 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svclasta_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clasta.nxv8bf16( [[TMP0]], [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z18test_svclasta_bf16u10__SVBool_tu14__SVBfloat16_tS0_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clasta.nxv8bf16( [[TMP0]], [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svclasta_bf16'}} - return SVE_ACLE_FUNC(svclasta, _bf16, , )(pg, fallback, data); -} - -// CHECK-LABEL: @test_svclasta_n_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16( [[TMP0]], bfloat [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret bfloat [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z20test_svclasta_n_bf16u10__SVBool_tu6__bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16( [[TMP0]], bfloat [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret bfloat [[TMP1]] -// -bfloat16_t test_svclasta_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svclasta_n_bf16'}} - return SVE_ACLE_FUNC(svclasta, _n_bf16, , )(pg, fallback, data); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clasta.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clasta.c index 4712d57be729b..f11decddf05bb 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clasta.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clasta.c @@ -423,3 +423,37 @@ float64_t test_svclasta_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data) { return SVE_ACLE_FUNC(svclasta,_n_f64,,)(pg, fallback, data); } + +// CHECK-LABEL: @test_svclasta_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clasta.nxv8bf16( [[TMP0]], [[FALLBACK:%.*]], [[DATA:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svclasta_bf16u10__SVBool_tu14__SVBfloat16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clasta.nxv8bf16( [[TMP0]], [[FALLBACK:%.*]], [[DATA:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svclasta_bf16'}} + return SVE_ACLE_FUNC(svclasta, _bf16, , )(pg, fallback, data); +} + +// CHECK-LABEL: @test_svclasta_n_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16( [[TMP0]], bfloat [[FALLBACK:%.*]], [[DATA:%.*]]) +// CHECK-NEXT: ret bfloat [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svclasta_n_bf16u10__SVBool_tu6__bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16( [[TMP0]], bfloat [[FALLBACK:%.*]], [[DATA:%.*]]) +// CPP-CHECK-NEXT: ret bfloat [[TMP1]] +// +bfloat16_t test_svclasta_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svclasta_n_bf16'}} + return SVE_ACLE_FUNC(svclasta, _n_bf16, , )(pg, fallback, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clastb-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clastb-bfloat.c deleted file mode 100644 index 2ee31baf476a0..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clastb-bfloat.c +++ /dev/null @@ -1,58 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svclastb_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clastb.nxv8bf16( [[TMP0]], [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z18test_svclastb_bf16u10__SVBool_tu14__SVBfloat16_tS0_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clastb.nxv8bf16( [[TMP0]], [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svclastb_bf16'}} - return SVE_ACLE_FUNC(svclastb, _bf16, , )(pg, fallback, data); -} - -// CHECK-LABEL: @test_svclastb_n_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16( [[TMP0]], bfloat [[FALLBACK:%.*]], [[DATA:%.*]]) -// CHECK-NEXT: ret bfloat [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z20test_svclastb_n_bf16u10__SVBool_tu6__bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16( [[TMP0]], bfloat [[FALLBACK:%.*]], [[DATA:%.*]]) -// CPP-CHECK-NEXT: ret bfloat [[TMP1]] -// -bfloat16_t test_svclastb_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svclastb_n_bf16'}} - return SVE_ACLE_FUNC(svclastb, _n_bf16, , )(pg, fallback, data); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clastb.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clastb.c index caa5dd9381ab2..394ca9fd0d50d 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clastb.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_clastb.c @@ -423,3 +423,37 @@ float64_t test_svclastb_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data) { return SVE_ACLE_FUNC(svclastb,_n_f64,,)(pg, fallback, data); } + +// CHECK-LABEL: @test_svclastb_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clastb.nxv8bf16( [[TMP0]], [[FALLBACK:%.*]], [[DATA:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svclastb_bf16u10__SVBool_tu14__SVBfloat16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.clastb.nxv8bf16( [[TMP0]], [[FALLBACK:%.*]], [[DATA:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svclastb_bf16'}} + return SVE_ACLE_FUNC(svclastb, _bf16, , )(pg, fallback, data); +} + +// CHECK-LABEL: @test_svclastb_n_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16( [[TMP0]], bfloat [[FALLBACK:%.*]], [[DATA:%.*]]) +// CHECK-NEXT: ret bfloat [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z20test_svclastb_n_bf16u10__SVBool_tu6__bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16( [[TMP0]], bfloat [[FALLBACK:%.*]], [[DATA:%.*]]) +// CPP-CHECK-NEXT: ret bfloat [[TMP1]] +// +bfloat16_t test_svclastb_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svclastb_n_bf16'}} + return SVE_ACLE_FUNC(svclastb, _n_bf16, , )(pg, fallback, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_cnt-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_cnt-bfloat.c deleted file mode 100644 index 9c28182166702..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_cnt-bfloat.c +++ /dev/null @@ -1,74 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svcnt_bf16_z( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svcnt_bf16_zu10__SVBool_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svuint16_t test_svcnt_bf16_z(svbool_t pg, svbfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_z'}} - return SVE_ACLE_FUNC(svcnt, _bf16, _z, )(pg, op); -} - -// CHECK-LABEL: @test_svcnt_bf16_m( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svcnt_bf16_mu12__SVUint16_tu10__SVBool_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svuint16_t test_svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_m'}} - return SVE_ACLE_FUNC(svcnt, _bf16, _m, )(inactive, pg, op); -} -// CHECK-LABEL: @test_svcnt_bf16_x( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( undef, [[TMP0]], [[OP:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svcnt_bf16_xu10__SVBool_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( undef, [[TMP0]], [[OP:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svuint16_t test_svcnt_bf16_x(svbool_t pg, svbfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_x'}} - return SVE_ACLE_FUNC(svcnt, _bf16, _x, )(pg, op); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_cnt.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_cnt.c index fe545adbd6a10..a45ebdef63d4b 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_cnt.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_cnt.c @@ -570,3 +570,53 @@ svuint64_t test_svcnt_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svcnt,_f64,_x,)(pg, op); } + +// CHECK-LABEL: @test_svcnt_bf16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svcnt_bf16_zu10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svcnt_bf16_z(svbool_t pg, svbfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_z'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svcnt_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svcnt_bf16_mu12__SVUint16_tu10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_m'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _m, )(inactive, pg, op); +} +// CHECK-LABEL: @test_svcnt_bf16_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svcnt_bf16_xu10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.cnt.nxv8bf16( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svcnt_bf16_x(svbool_t pg, svbfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_x'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _x, )(pg, op); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create2-bfloat.c deleted file mode 100644 index 4691172b14a69..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create2-bfloat.c +++ /dev/null @@ -1,40 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svcreate2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 -// CHECK-NEXT: ret { , } [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z19test_svcreate2_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 -// CPP-CHECK-NEXT: ret { , } [[TMP1]] -// -svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) ATTR -{ - return SVE_ACLE_FUNC(svcreate2,_bf16,,)(x0, x1); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create2.c index 0809250370a68..ca023e6501504 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create2.c @@ -225,3 +225,20 @@ svmfloat8x2_t test_svcreate2_mf8(svmfloat8_t x0, svmfloat8_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_mf8,,)(x0, x1); } + +// CHECK-LABEL: @test_svcreate2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svcreate2_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] +// +svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) ATTR +{ + return SVE_ACLE_FUNC(svcreate2,_bf16,,)(x0, x1); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create3-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create3-bfloat.c deleted file mode 100644 index 3e2bd259e5c7c..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create3-bfloat.c +++ /dev/null @@ -1,42 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svcreate3_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 -// CHECK-NEXT: ret { , , } [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z19test_svcreate3_bf16u14__SVBfloat16_tS_S_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 -// CPP-CHECK-NEXT: ret { , , } [[TMP2]] -// -svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) ATTR -{ - return SVE_ACLE_FUNC(svcreate3,_bf16,,)(x0, x1, x2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create3.c index 3b003bd534b25..c46ee2a057c73 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create3.c @@ -249,3 +249,22 @@ svmfloat8x3_t test_svcreate3_mf8(svmfloat8_t x0, svmfloat8_t x1, svmfloat8_t x2) { return SVE_ACLE_FUNC(svcreate3,_mf8,,)(x0, x1, x2); } + +// CHECK-LABEL: @test_svcreate3_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z19test_svcreate3_bf16u14__SVBfloat16_tS_S_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] +// +svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) ATTR +{ + return SVE_ACLE_FUNC(svcreate3,_bf16,,)(x0, x1, x2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create4-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create4-bfloat.c deleted file mode 100644 index e821c7501d7a9..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create4-bfloat.c +++ /dev/null @@ -1,44 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svcreate4_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 -// CHECK-NEXT: ret { , , , } [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z19test_svcreate4_bf16u14__SVBfloat16_tS_S_S_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 -// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] -// -svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4) ATTR -{ - return SVE_ACLE_FUNC(svcreate4,_bf16,,)(x0, x1, x2, x4); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create4.c index 79a88d271f340..a0b6fcb217a52 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_create4.c @@ -273,3 +273,24 @@ svmfloat8x4_t test_svcreate4_mf8(svmfloat8_t x0, svmfloat8_t x1, svmfloat8_t x2, { return SVE_ACLE_FUNC(svcreate4,_mf8,,)(x0, x1, x2, x4); } + +// CHECK-LABEL: @test_svcreate4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z19test_svcreate4_bf16u14__SVBfloat16_tS_S_S_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] +// +svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4) ATTR +{ + return SVE_ACLE_FUNC(svcreate4,_bf16,,)(x0, x1, x2, x4); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dup-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dup-bfloat.c deleted file mode 100644 index 2e58b503dd81d..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dup-bfloat.c +++ /dev/null @@ -1,112 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svdup_n_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, bfloat [[OP:%.*]], i64 0 -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z17test_svdup_n_bf16u6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, bfloat [[OP:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svdup_n_bf16(bfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16'}} - return SVE_ACLE_FUNC(svdup, _n, _bf16, )(op); -} - -// CHECK-LABEL: @test_svdup_n_bf16_z( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( zeroinitializer, [[TMP0]], bfloat [[OP:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z19test_svdup_n_bf16_zu10__SVBool_tu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( zeroinitializer, [[TMP0]], bfloat [[OP:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_z'}} - return SVE_ACLE_FUNC(svdup, _n, _bf16_z, )(pg, op); -} - -// CHECK-LABEL: @test_svdup_n_bf16_m( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], bfloat [[OP:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z19test_svdup_n_bf16_mu14__SVBfloat16_tu10__SVBool_tu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], bfloat [[OP:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_m'}} - return SVE_ACLE_FUNC(svdup, _n, _bf16_m, )(inactive, pg, op); -} - -// CHECK-LABEL: @test_svdup_n_bf16_x( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( undef, [[TMP0]], bfloat [[OP:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z19test_svdup_n_bf16_xu10__SVBool_tu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( undef, [[TMP0]], bfloat [[OP:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_x'}} - return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op); -} - -// CHECK-LABEL: @test_svdup_lane_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i16 [[INDEX:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl.nxv8bf16( [[DATA:%.*]], [[DOTSPLAT]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z20test_svdup_lane_bf16u14__SVBfloat16_tt( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i16 [[INDEX:%.*]], i64 0 -// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl.nxv8bf16( [[DATA:%.*]], [[DOTSPLAT]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svdup_lane_bf16(svbfloat16_t data, uint16_t index) MODE_ATTR -{ - // expected-warning@+1 {{implicit declaration of function 'svdup_lane_bf16'}} - return SVE_ACLE_FUNC(svdup_lane,_bf16,,)(data, index); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c index fec77ceb463ff..e76a5df69fc5f 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c @@ -1040,3 +1040,91 @@ svbool_t test_svdup_n_b64(bool op) MODE_ATTR { return SVE_ACLE_FUNC(svdup,_n,_b64,)(op); } + +// CHECK-LABEL: @test_svdup_n_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, bfloat [[OP:%.*]], i64 0 +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svdup_n_bf16u6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, bfloat [[OP:%.*]], i64 0 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svdup_n_bf16(bfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16'}} + return SVE_ACLE_FUNC(svdup, _n, _bf16, )(op); +} + +// CHECK-LABEL: @test_svdup_n_bf16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( zeroinitializer, [[TMP0]], bfloat [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svdup_n_bf16_zu10__SVBool_tu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( zeroinitializer, [[TMP0]], bfloat [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_z'}} + return SVE_ACLE_FUNC(svdup, _n, _bf16_z, )(pg, op); +} + +// CHECK-LABEL: @test_svdup_n_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], bfloat [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svdup_n_bf16_mu14__SVBfloat16_tu10__SVBool_tu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], bfloat [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_m'}} + return SVE_ACLE_FUNC(svdup, _n, _bf16_m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svdup_n_bf16_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( undef, [[TMP0]], bfloat [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svdup_n_bf16_xu10__SVBool_tu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.dup.nxv8bf16( undef, [[TMP0]], bfloat [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_x'}} + return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op); +} + +// CHECK-LABEL: @test_svdup_lane_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i16 [[INDEX:%.*]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl.nxv8bf16( [[DATA:%.*]], [[DOTSPLAT]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svdup_lane_bf16u14__SVBfloat16_tt( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i16 [[INDEX:%.*]], i64 0 +// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl.nxv8bf16( [[DATA:%.*]], [[DOTSPLAT]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svdup_lane_bf16(svbfloat16_t data, uint16_t index) MODE_ATTR +{ + // expected-warning@+1 {{implicit declaration of function 'svdup_lane_bf16'}} + return SVE_ACLE_FUNC(svdup_lane,_bf16,,)(data, index); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq-bfloat.c deleted file mode 100644 index 45e30aa20f29a..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq-bfloat.c +++ /dev/null @@ -1,73 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svdupq_lane_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[DATA:%.*]], i64 [[INDEX:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z21test_svdupq_lane_bf16u14__SVBfloat16_tm( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[DATA:%.*]], i64 [[INDEX:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svdupq_lane_bf16'}} - return SVE_ACLE_FUNC(svdupq_lane, _bf16, , )(data, index); -} -// CHECK-LABEL: @test_svdupq_n_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x bfloat> poison, bfloat [[X0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x bfloat> [[TMP0]], bfloat [[X1:%.*]], i64 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x bfloat> [[TMP1]], bfloat [[X2:%.*]], i64 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x bfloat> [[TMP2]], bfloat [[X3:%.*]], i64 3 -// CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x bfloat> [[TMP3]], bfloat [[X4:%.*]], i64 4 -// CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x bfloat> [[TMP4]], bfloat [[X5:%.*]], i64 5 -// CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x bfloat> [[TMP5]], bfloat [[X6:%.*]], i64 6 -// CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x bfloat> [[TMP6]], bfloat [[X7:%.*]], i64 7 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP7]], i64 0) -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[TMP8]], i64 0) -// CHECK-NEXT: ret [[TMP9]] -// -// CPP-CHECK-LABEL: @_Z18test_svdupq_n_bf16u6__bf16u6__bf16u6__bf16u6__bf16u6__bf16u6__bf16u6__bf16u6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x bfloat> poison, bfloat [[X0:%.*]], i64 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x bfloat> [[TMP0]], bfloat [[X1:%.*]], i64 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x bfloat> [[TMP1]], bfloat [[X2:%.*]], i64 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x bfloat> [[TMP2]], bfloat [[X3:%.*]], i64 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x bfloat> [[TMP3]], bfloat [[X4:%.*]], i64 4 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x bfloat> [[TMP4]], bfloat [[X5:%.*]], i64 5 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x bfloat> [[TMP5]], bfloat [[X6:%.*]], i64 6 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x bfloat> [[TMP6]], bfloat [[X7:%.*]], i64 7 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP7]], i64 0) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[TMP8]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP9]] -// -svbfloat16_t test_svdupq_n_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3, - bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7) MODE_ATTR { - // - // expected-warning@+1 {{implicit declaration of function 'svdupq_n_bf16'}} - return SVE_ACLE_FUNC(svdupq, _n, _bf16, )(x0, x1, x2, x3, x4, x5, x6, x7); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c index 5671383dc7339..5928de5fea1ff 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c @@ -875,3 +875,52 @@ svbool_t test_svdupq_n_b64(bool x0, bool x1) MODE_ATTR { return SVE_ACLE_FUNC(svdupq,_n,_b64,)(x0, x1); } + +// CHECK-LABEL: @test_svdupq_lane_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[DATA:%.*]], i64 [[INDEX:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svdupq_lane_bf16u14__SVBfloat16_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[DATA:%.*]], i64 [[INDEX:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svdupq_lane_bf16'}} + return SVE_ACLE_FUNC(svdupq_lane, _bf16, , )(data, index); +} +// CHECK-LABEL: @test_svdupq_n_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x bfloat> poison, bfloat [[X0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x bfloat> [[TMP0]], bfloat [[X1:%.*]], i64 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x bfloat> [[TMP1]], bfloat [[X2:%.*]], i64 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x bfloat> [[TMP2]], bfloat [[X3:%.*]], i64 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x bfloat> [[TMP3]], bfloat [[X4:%.*]], i64 4 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x bfloat> [[TMP4]], bfloat [[X5:%.*]], i64 5 +// CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x bfloat> [[TMP5]], bfloat [[X6:%.*]], i64 6 +// CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x bfloat> [[TMP6]], bfloat [[X7:%.*]], i64 7 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP7]], i64 0) +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[TMP8]], i64 0) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z18test_svdupq_n_bf16u6__bf16u6__bf16u6__bf16u6__bf16u6__bf16u6__bf16u6__bf16u6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x bfloat> poison, bfloat [[X0:%.*]], i64 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x bfloat> [[TMP0]], bfloat [[X1:%.*]], i64 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x bfloat> [[TMP1]], bfloat [[X2:%.*]], i64 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x bfloat> [[TMP2]], bfloat [[X3:%.*]], i64 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x bfloat> [[TMP3]], bfloat [[X4:%.*]], i64 4 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x bfloat> [[TMP4]], bfloat [[X5:%.*]], i64 5 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x bfloat> [[TMP5]], bfloat [[X6:%.*]], i64 6 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x bfloat> [[TMP6]], bfloat [[X7:%.*]], i64 7 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8bf16.v8bf16( poison, <8 x bfloat> [[TMP7]], i64 0) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.aarch64.sve.dupq.lane.nxv8bf16( [[TMP8]], i64 0) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svbfloat16_t test_svdupq_n_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3, + bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7) MODE_ATTR { + // + // expected-warning@+1 {{implicit declaration of function 'svdupq_n_bf16'}} + return SVE_ACLE_FUNC(svdupq, _n, _bf16, )(x0, x1, x2, x3, x4, x5, x6, x7); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ext-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ext-bfloat.c deleted file mode 100644 index 69f7c059527d4..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ext-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svext_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ext.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]], i32 127) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z15test_svext_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ext.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]], i32 127) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svext_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - // expected-warning@+1 {{implicit declaration of function 'svext_bf16'}} - return SVE_ACLE_FUNC(svext,_bf16,,)(op1, op2, 127); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ext.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ext.c index e5b8e7e8a270c..9cdc95788bb7c 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ext.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ext.c @@ -246,3 +246,19 @@ svfloat64_t test_svext_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svext,_f64,,)(op1, op2, 31); } + +// CHECK-LABEL: @test_svext_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ext.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]], i32 127) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svext_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.ext.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]], i32 127) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svext_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + // expected-warning@+1 {{implicit declaration of function 'svext_bf16'}} + return SVE_ACLE_FUNC(svext,_bf16,,)(op1, op2, 127); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get2-bfloat.c deleted file mode 100644 index 05d9ec6684794..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get2-bfloat.c +++ /dev/null @@ -1,61 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svget2_bf16_0( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: ret [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z18test_svget2_bf16_014svbfloat16x2_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: ret [[TMP2]] -// -svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 0); -} - -// CHECK-LABEL: @test_svget2_bf16_1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: ret [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z18test_svget2_bf16_114svbfloat16x2_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: ret [[TMP2]] -// -svbfloat16_t test_svget2_bf16_1(svbfloat16x2_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 1); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get2.c index 66fd925db9359..ac93ac0073404 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get2.c @@ -250,3 +250,41 @@ svmfloat8_t test_svget2_mf8(svmfloat8x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_mf8,,)(tuple, 0); } + +// CHECK-LABEL: @test_svget2_bf16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z18test_svget2_bf16_014svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 0); +} + +// CHECK-LABEL: @test_svget2_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z18test_svget2_bf16_114svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svbfloat16_t test_svget2_bf16_1(svbfloat16x2_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 1); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get3-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get3-bfloat.c deleted file mode 100644 index 950c4dad9749a..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get3-bfloat.c +++ /dev/null @@ -1,86 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svget3_bf16_0( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: ret [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z18test_svget3_bf16_014svbfloat16x3_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: ret [[TMP3]] -// -svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 0); -} - -// CHECK-LABEL: @test_svget3_bf16_1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: ret [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z18test_svget3_bf16_114svbfloat16x3_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: ret [[TMP3]] -// -svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 1); -} - -// CHECK-LABEL: @test_svget3_bf16_2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: ret [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z18test_svget3_bf16_214svbfloat16x3_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: ret [[TMP3]] -// -svbfloat16_t test_svget3_bf16_2(svbfloat16x3_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get3.c index db11a42eded6e..c81d0866adb5e 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get3.c @@ -275,3 +275,66 @@ svmfloat8_t test_svget3_mf8(svmfloat8x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_mf8,,)(tuple, 0); } + +// CHECK-LABEL: @test_svget3_bf16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z18test_svget3_bf16_014svbfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 0); +} + +// CHECK-LABEL: @test_svget3_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z18test_svget3_bf16_114svbfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 1); +} + +// CHECK-LABEL: @test_svget3_bf16_2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z18test_svget3_bf16_214svbfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svbfloat16_t test_svget3_bf16_2(svbfloat16x3_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get4-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get4-bfloat.c deleted file mode 100644 index 55b379140ca3d..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get4-bfloat.c +++ /dev/null @@ -1,115 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svget4_bf16_0( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: ret [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z18test_svget4_bf16_014svbfloat16x4_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: ret [[TMP4]] -// -svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 0); -} - -// CHECK-LABEL: @test_svget4_bf16_1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: ret [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z18test_svget4_bf16_114svbfloat16x4_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: ret [[TMP4]] -// -svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 1); -} - -// CHECK-LABEL: @test_svget4_bf16_2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: ret [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z18test_svget4_bf16_214svbfloat16x4_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: ret [[TMP4]] -// -svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 2); -} - -// CHECK-LABEL: @test_svget4_bf16_3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: ret [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z18test_svget4_bf16_314svbfloat16x4_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: ret [[TMP4]] -// -svbfloat16_t test_svget4_bf16_3(svbfloat16x4_t tuple) ATTR -{ - return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 3); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get4.c index d2661d3a1d54e..3968d49799181 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_get4.c @@ -300,3 +300,95 @@ svmfloat8_t test_svget4_mf8(svmfloat8x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_mf8,,)(tuple, 0); } + +// CHECK-LABEL: @test_svget4_bf16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svget4_bf16_014svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 0); +} + +// CHECK-LABEL: @test_svget4_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svget4_bf16_114svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 1); +} + +// CHECK-LABEL: @test_svget4_bf16_2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svget4_bf16_214svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 2); +} + +// CHECK-LABEL: @test_svget4_bf16_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svget4_bf16_314svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svbfloat16_t test_svget4_bf16_3(svbfloat16x4_t tuple) ATTR +{ + return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 3); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_insr-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_insr-bfloat.c deleted file mode 100644 index f65da9b6140de..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_insr-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svinsr_n_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.insr.nxv8bf16( [[OP1:%.*]], bfloat [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z18test_svinsr_n_bf16u14__SVBfloat16_tu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.insr.nxv8bf16( [[OP1:%.*]], bfloat [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svinsr_n_bf16(svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svinsr_n_bf16'}} - return SVE_ACLE_FUNC(svinsr, _n_bf16, , )(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_insr.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_insr.c index fae3220a02f95..cb2dd26722c6e 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_insr.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_insr.c @@ -186,3 +186,18 @@ svfloat64_t test_svinsr_n_f64(svfloat64_t op1, float64_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svinsr,_n_f64,,)(op1, op2); } + +// CHECK-LABEL: @test_svinsr_n_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.insr.nxv8bf16( [[OP1:%.*]], bfloat [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svinsr_n_bf16u14__SVBfloat16_tu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.insr.nxv8bf16( [[OP1:%.*]], bfloat [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svinsr_n_bf16(svbfloat16_t op1, bfloat16_t op2) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svinsr_n_bf16'}} + return SVE_ACLE_FUNC(svinsr, _n_bf16, , )(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lasta-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lasta-bfloat.c deleted file mode 100644 index 3ef2a3fc9397c..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lasta-bfloat.c +++ /dev/null @@ -1,41 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svlasta_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lasta.nxv8bf16( [[TMP0]], [[OP:%.*]]) -// CHECK-NEXT: ret bfloat [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svlasta_bf16u10__SVBool_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lasta.nxv8bf16( [[TMP0]], [[OP:%.*]]) -// CPP-CHECK-NEXT: ret bfloat [[TMP1]] -// -bfloat16_t test_svlasta_bf16(svbool_t pg, svbfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svlasta_bf16'}} - return SVE_ACLE_FUNC(svlasta, _bf16, , )(pg, op); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lasta.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lasta.c index 5db5138981dce..afd49f558c406 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lasta.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lasta.c @@ -204,3 +204,20 @@ float64_t test_svlasta_f64(svbool_t pg, svfloat64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svlasta,_f64,,)(pg, op); } + +// CHECK-LABEL: @test_svlasta_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lasta.nxv8bf16( [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret bfloat [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svlasta_bf16u10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lasta.nxv8bf16( [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret bfloat [[TMP1]] +// +bfloat16_t test_svlasta_bf16(svbool_t pg, svbfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svlasta_bf16'}} + return SVE_ACLE_FUNC(svlasta, _bf16, , )(pg, op); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lastb-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lastb-bfloat.c deleted file mode 100644 index d2caab2bd5dfe..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lastb-bfloat.c +++ /dev/null @@ -1,41 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svlastb_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lastb.nxv8bf16( [[TMP0]], [[OP:%.*]]) -// CHECK-NEXT: ret bfloat [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svlastb_bf16u10__SVBool_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lastb.nxv8bf16( [[TMP0]], [[OP:%.*]]) -// CPP-CHECK-NEXT: ret bfloat [[TMP1]] -// -bfloat16_t test_svlastb_bf16(svbool_t pg, svbfloat16_t op) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svlastb_bf16'}} - return SVE_ACLE_FUNC(svlastb, _bf16, , )(pg, op); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lastb.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lastb.c index ce87ff77c2c83..7d2908af84675 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lastb.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_lastb.c @@ -204,3 +204,20 @@ float64_t test_svlastb_f64(svbool_t pg, svfloat64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svlastb,_f64,,)(pg, op); } + +// CHECK-LABEL: @test_svlastb_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lastb.nxv8bf16( [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret bfloat [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svlastb_bf16u10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lastb.nxv8bf16( [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret bfloat [[TMP1]] +// +bfloat16_t test_svlastb_bf16(svbool_t pg, svbfloat16_t op) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svlastb_bf16'}} + return SVE_ACLE_FUNC(svlastb, _bf16, , )(pg, op); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1-bfloat.c deleted file mode 100644 index aaf4e652cd145..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1-bfloat.c +++ /dev/null @@ -1,65 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svld1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.masked.load.nxv8bf16.p0(ptr [[BASE:%.*]], i32 1, [[TMP0]], zeroinitializer) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z15test_svld1_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.masked.load.nxv8bf16.p0(ptr [[BASE:%.*]], i32 1, [[TMP0]], zeroinitializer) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld1,_bf16,,)(pg, base); -} - -// CHECK-LABEL: @test_svld1_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, [[TMP0]], zeroinitializer) -// CHECK-NEXT: ret [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z20test_svld1_vnum_bf16u10__SVBool_tPKu6__bf16l( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, [[TMP0]], zeroinitializer) -// CPP-CHECK-NEXT: ret [[TMP4]] -// -svbfloat16_t test_svld1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld1_vnum,_bf16,,)(pg, base, vnum); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c index 40dcd65f6c609..19e5243c8a625 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c @@ -1201,3 +1201,45 @@ svfloat64_t test_svld1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, i } #endif + +// CHECK-LABEL: @test_svld1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.masked.load.nxv8bf16.p0(ptr [[BASE:%.*]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z15test_svld1_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.masked.load.nxv8bf16.p0(ptr [[BASE:%.*]], i32 1, [[TMP0]], zeroinitializer) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1,_bf16,,)(pg, base); +} + +// CHECK-LABEL: @test_svld1_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z20test_svld1_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, [[TMP0]], zeroinitializer) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svbfloat16_t test_svld1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1_vnum,_bf16,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro-bfloat.c deleted file mode 100644 index 5107877ae361c..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro-bfloat.c +++ /dev/null @@ -1,32 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -target-feature +f64mm -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -target-feature +f64mm -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -target-feature +f64mm -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -target-feature +f64mm -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svld1ro_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1ro.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svld1ro_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1ro.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svld1ro_bf16(svbool_t pg, const bfloat16_t *base) { - return SVE_ACLE_FUNC(svld1ro, _bf16, , )(pg, base); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c index e7520a504b121..723135bd1a124 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c @@ -201,3 +201,19 @@ svfloat32_t test_svld1ro_f32(svbool_t pg, const float32_t *base) { svfloat64_t test_svld1ro_f64(svbool_t pg, const float64_t *base) { return SVE_ACLE_FUNC(svld1ro, _f64, , )(pg, base); } + +// CHECK-LABEL: @test_svld1ro_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1ro.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1ro_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1ro.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svld1ro_bf16(svbool_t pg, const bfloat16_t *base) { + return SVE_ACLE_FUNC(svld1ro, _bf16, , )(pg, base); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq-bfloat.c deleted file mode 100644 index d50b0269e5297..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq-bfloat.c +++ /dev/null @@ -1,40 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svld1rq_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1rq.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svld1rq_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1rq.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svld1rq_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld1rq,_bf16,,)(pg, base); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c index 0e7455d413274..44dbb9696f154 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c @@ -220,3 +220,20 @@ svfloat64_t test_svld1rq_f64(svbool_t pg, const float64_t *base) MODE_ATTR { return SVE_ACLE_FUNC(svld1rq,_f64,,)(pg, base); } + +// CHECK-LABEL: @test_svld1rq_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1rq.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1rq_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1rq.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svld1rq_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1rq,_bf16,,)(pg, base); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2-bfloat.c deleted file mode 100644 index 5535b3d090d32..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2-bfloat.c +++ /dev/null @@ -1,60 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svld2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret { , } [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z15test_svld2_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret { , } [[TMP1]] -// -svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld2,_bf16,,)(pg, base); -} - - -// CHECK-LABEL: @test_svld2_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: ret { , } [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z20test_svld2_vnum_bf16u10__SVBool_tPKu6__bf16l( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: ret { , } [[TMP2]] -// -svbfloat16x2_t test_svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld2_vnum,_bf16,,)(pg, base, vnum); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c index abe1c87b6f2c3..f785622af056c 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c @@ -4,8 +4,8 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include @@ -442,3 +442,40 @@ svmfloat8x2_t test_svld2_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vn { return SVE_ACLE_FUNC(svld2_vnum,_mf8,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z15test_svld2_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP1]] +// +svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld2,_bf16,,)(pg, base); +} + + +// CHECK-LABEL: @test_svld2_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret { , } [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z20test_svld2_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret { , } [[TMP2]] +// +svbfloat16x2_t test_svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld2_vnum,_bf16,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3-bfloat.c deleted file mode 100644 index 41a367b737b0f..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3-bfloat.c +++ /dev/null @@ -1,60 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svld3_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret { , , } [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z15test_svld3_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret { , , } [[TMP1]] -// -svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld3,_bf16,,)(pg, base); -} - -// CHECK-LABEL: @test_svld3_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: ret { , , } [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z20test_svld3_vnum_bf16u10__SVBool_tPKu6__bf16l( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: ret { , , } [[TMP2]] -// -svbfloat16x3_t test_svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld3_vnum,_bf16,,)(pg, base, vnum); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c index 5ff7ad9de483b..2197775030766 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c @@ -4,8 +4,8 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include @@ -441,3 +441,39 @@ svmfloat8x3_t test_svld3_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vn { return SVE_ACLE_FUNC(svld3_vnum,_mf8,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld3_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z15test_svld3_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] +// +svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld3,_bf16,,)(pg, base); +} + +// CHECK-LABEL: @test_svld3_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret { , , } [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z20test_svld3_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] +// +svbfloat16x3_t test_svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld3_vnum,_bf16,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4-bfloat.c deleted file mode 100644 index a88e6f11a0510..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4-bfloat.c +++ /dev/null @@ -1,59 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svld4_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret { , , , } [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z15test_svld4_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] -// -svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld4,_bf16,,)(pg, base); -} - -// CHECK-LABEL: @test_svld4_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: ret { , , , } [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z20test_svld4_vnum_bf16u10__SVBool_tPKu6__bf16l( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] -// -svbfloat16x4_t test_svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR -{ - return SVE_ACLE_FUNC(svld4_vnum,_bf16,,)(pg, base, vnum); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c index 650fd5986be27..cd79dcee42bdb 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c @@ -4,8 +4,8 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include @@ -441,3 +441,39 @@ svmfloat8x4_t test_svld4_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vn { return SVE_ACLE_FUNC(svld4_vnum,_mf8,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svld4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z15test_svld4_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] +// +svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld4,_bf16,,)(pg, base); +} + +// CHECK-LABEL: @test_svld4_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret { , , , } [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z20test_svld4_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] +// +svbfloat16x4_t test_svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld4_vnum,_bf16,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1-bfloat.c deleted file mode 100644 index dce5839ebd759..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1-bfloat.c +++ /dev/null @@ -1,52 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svldff1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svldff1_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svldff1_bf16(svbool_t pg, const bfloat16_t *base) -{ - return SVE_ACLE_FUNC(svldff1,_bf16,,)(pg, base); -} - -// CHECK-LABEL: @test_svldff1_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z22test_svldff1_vnum_bf16u10__SVBool_tPKu6__bf16l( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] -// -svbfloat16_t test_svldff1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) -{ - return SVE_ACLE_FUNC(svldff1_vnum,_bf16,,)(pg, base, vnum); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c index ba4091660bfae..461c620b21690 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c @@ -1117,3 +1117,39 @@ svfloat32_t test_svldff1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, svfloat64_t test_svldff1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index) { return SVE_ACLE_FUNC(svldff1_gather, _u64base, _index_f64, )(pg, bases, index); } + +// CHECK-LABEL: @test_svldff1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svldff1_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svldff1_bf16(svbool_t pg, const bfloat16_t *base) +{ + return SVE_ACLE_FUNC(svldff1,_bf16,,)(pg, base); +} + +// CHECK-LABEL: @test_svldff1_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z22test_svldff1_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldff1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svbfloat16_t test_svldff1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldff1_vnum,_bf16,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1-bfloat.c deleted file mode 100644 index a8ebc5d63d894..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1-bfloat.c +++ /dev/null @@ -1,51 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svldnf1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnf1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svldnf1_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnf1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svldnf1_bf16(svbool_t pg, const bfloat16_t *base) -{ - return SVE_ACLE_FUNC(svldnf1,_bf16,,)(pg, base); -} - -// CHECK-LABEL: @test_svldnf1_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldnf1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z22test_svldnf1_vnum_bf16u10__SVBool_tPKu6__bf16l( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldnf1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] -// -svbfloat16_t test_svldnf1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) -{ - return SVE_ACLE_FUNC(svldnf1_vnum,_bf16,,)(pg, base, vnum); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c index 8e738d839cd85..efdbe356f7c66 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c @@ -433,3 +433,39 @@ svfloat64_t test_svldnf1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vn { return SVE_ACLE_FUNC(svldnf1_vnum,_f64,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svldnf1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnf1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svldnf1_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnf1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svldnf1_bf16(svbool_t pg, const bfloat16_t *base) +{ + return SVE_ACLE_FUNC(svldnf1,_bf16,,)(pg, base); +} + +// CHECK-LABEL: @test_svldnf1_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldnf1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z22test_svldnf1_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldnf1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svbfloat16_t test_svldnf1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnf1_vnum,_bf16,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1-bfloat.c deleted file mode 100644 index 82d5bff2516d8..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1-bfloat.c +++ /dev/null @@ -1,60 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svldnt1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z17test_svldnt1_bf16u10__SVBool_tPKu6__bf16( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svldnt1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR -{ - return SVE_ACLE_FUNC(svldnt1,_bf16,,)(pg, base); -} - -// CHECK-LABEL: @test_svldnt1_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z22test_svldnt1_vnum_bf16u10__SVBool_tPKu6__bf16l( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] -// -svbfloat16_t test_svldnt1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR -{ - return SVE_ACLE_FUNC(svldnt1_vnum,_bf16,,)(pg, base, vnum); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c index b96bf0cb23d12..19919bf1d4af3 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c @@ -442,3 +442,39 @@ svmfloat8_t test_svldnt1_vnum_mf8(svbool_t pg, const mfloat8_t *base, int64_t vn { return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,,)(pg, base, vnum); } + +// CHECK-LABEL: @test_svldnt1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svldnt1_bf16u10__SVBool_tPKu6__bf16( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svldnt1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svldnt1,_bf16,,)(pg, base); +} + +// CHECK-LABEL: @test_svldnt1_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z22test_svldnt1_vnum_bf16u10__SVBool_tPKu6__bf16l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ldnt1.nxv8bf16( [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svbfloat16_t test_svldnt1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR +{ + return SVE_ACLE_FUNC(svldnt1_vnum,_bf16,,)(pg, base, vnum); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_len-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_len-bfloat.c deleted file mode 100644 index 049207514bc1d..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_len-bfloat.c +++ /dev/null @@ -1,41 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svlen_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -// CHECK-NEXT: ret i64 [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z15test_svlen_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -// CPP-CHECK-NEXT: ret i64 [[TMP1]] -// -uint64_t test_svlen_bf16(svbfloat16_t op) MODE_ATTR -{ - // expected-warning@+1 {{implicit declaration of function 'svlen_bf16'}} - return SVE_ACLE_FUNC(svlen,_bf16,,)(op); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_len.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_len.c index cca939296455e..5954e730864c4 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_len.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_len.c @@ -208,3 +208,21 @@ uint64_t test_svlen_f64(svfloat64_t op) MODE_ATTR { return SVE_ACLE_FUNC(svlen,_f64,,)(op); } + +// CHECK-LABEL: @test_svlen_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +// CHECK-NEXT: ret i64 [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z15test_svlen_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: ret i64 [[TMP1]] +// +uint64_t test_svlen_bf16(svbfloat16_t op) MODE_ATTR +{ + // expected-warning@+1 {{implicit declaration of function 'svlen_bf16'}} + return SVE_ACLE_FUNC(svlen,_bf16,,)(op); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret-bfloat.c deleted file mode 100644 index 02704229292b2..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret-bfloat.c +++ /dev/null @@ -1,2562 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x2 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE2 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x3 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE3 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x4 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x2 -triple aarch64 -target-feature +sme -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE2 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x3 -triple aarch64 -target-feature +sme -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE3 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x4 -triple aarch64 -target-feature +sme -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x2 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE2 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x3 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE3 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DTUPLE=x4 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -DTUPLE=x2 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE2 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -DTUPLE=x3 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE3 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -DTUPLE=x4 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=TUPLE4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -DTUPLE=x2 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE2 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -DTUPLE=x3 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE3 -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -DTUPLE=x4 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-TUPLE4 - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef TUPLE -#define TYPE_1(base,tuple) base ## tuple ## _t -#define TYPE_0(base,tuple) TYPE_1(base,tuple) -#define TYPE(base) TYPE_0(base,TUPLE) -#else -#define TYPE(base) base ## _t -#endif - -#ifdef SVE_OVERLOADED_FORMS -#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 -#else -#ifdef TUPLE -#define SVE_ACLE_FUNC_1(A1,A2,T) A1##A2##_##T -#define SVE_ACLE_FUNC_0(A1,A2,T) SVE_ACLE_FUNC_1(A1,A2,T) -#define SVE_ACLE_FUNC(A1,A2) SVE_ACLE_FUNC_0(A1,A2,TUPLE) -#else -#define SVE_ACLE_FUNC(A1,A2) A1##A2 -#endif -#endif - -// CHECK-LABEL: @test_svreinterpret_s8_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_s8_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_s8_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_s8_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z26test_svreinterpret_s8_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svint8) test_svreinterpret_s8_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_s8, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_s16_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_s16_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_s16_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_s16_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_s16_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svint16) test_svreinterpret_s16_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_s16, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_s32_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_s32_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_s32_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_s32_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_s32_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svint32) test_svreinterpret_s32_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_s32, _bf16)(op); -} -// CHECK-LABEL: @test_svreinterpret_s64_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_s64_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_s64_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_s64_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_s64_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svint64) test_svreinterpret_s64_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_s64, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_u8_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_u8_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_u8_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_u8_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z26test_svreinterpret_u8_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svuint8) test_svreinterpret_u8_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_u8, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_u16_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_u16_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_u16_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_u16_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_u16_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svuint16) test_svreinterpret_u16_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_u16, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_u32_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_u32_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_u32_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_u32_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_u32_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svuint32) test_svreinterpret_u32_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_u32, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_u64_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_u64_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_u64_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_u64_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_u64_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svuint64) test_svreinterpret_u64_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_u64, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_s8( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_s8( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_s8( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z26test_svreinterpret_bf16_s8u10__SVInt8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_s8(TYPE(svint8) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _s8)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_s16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_s16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_s16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s16u11__SVInt16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_s16(TYPE(svint16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _s16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_s32( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_s32( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_s32( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s32u11__SVInt32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_s32(TYPE(svint32) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _s32)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_s64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_s64( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_s64( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_s64( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s64u11__SVInt64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_s64(TYPE(svint64) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _s64)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_u8( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_u8( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_u8( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z26test_svreinterpret_bf16_u8u11__SVUint8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_u8(TYPE(svuint8) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _u8)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_u16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_u16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_u16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u16u12__SVUint16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_u16(TYPE(svuint16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _u16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_u32( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_u32( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_u32( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u32u12__SVUint32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_u32(TYPE(svuint32) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _u32)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_u64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_u64( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_u64( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_u64( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u64u12__SVUint64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_u64(TYPE(svuint64) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _u64)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret [[OP:%.*]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 -// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 -// TUPLE2-NEXT: ret { , } [[TMP5]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 -// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP8]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 -// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP11]] -// -// CPP-CHECK-LABEL: @_Z28test_svreinterpret_bf16_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret [[OP:%.*]] -// -// CPP-TUPLE2-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] -// -// CPP-TUPLE3-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] -// -// CPP-TUPLE4-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_f16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_f16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_f16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f16u13__SVFloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_f16(TYPE(svfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _f16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_f32( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_f32( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_f32( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f32u13__SVFloat32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_f32(TYPE(svfloat32) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _f32)(op); -} - -// CHECK-LABEL: @test_svreinterpret_bf16_f64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_bf16_f64( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_bf16_f64( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_bf16_f64( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f64u13__SVFloat64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svbfloat16) test_svreinterpret_bf16_f64(TYPE(svfloat64) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_bf16, _f64)(op); -} - -// CHECK-LABEL: @test_svreinterpret_f32_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_f32_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_f32_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_f32_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_f32_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svfloat32) test_svreinterpret_f32_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_f32, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_f16_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_f16_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_f16_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_f16_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_f16_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svfloat16) test_svreinterpret_f16_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_f16, _bf16)(op); -} - -// CHECK-LABEL: @test_svreinterpret_f64_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CHECK-NEXT: ret [[TMP0]] -// -// TUPLE2-LABEL: @test_svreinterpret_f64_bf16( -// TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// TUPLE2-NEXT: ret { , } [[TMP7]] -// -// TUPLE3-LABEL: @test_svreinterpret_f64_bf16( -// TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// TUPLE4-LABEL: @test_svreinterpret_f64_bf16( -// TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -// CPP-CHECK-LABEL: @_Z27test_svreinterpret_f64_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-CHECK-NEXT: ret [[TMP0]] -// -// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x2_t( -// CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to -// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 -// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to -// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 -// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] -// -// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x3_t( -// CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to -// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 -// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to -// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 -// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to -// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 -// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] -// -// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x4_t( -// CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 -// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 -// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 -// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 -// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to -// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 -// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to -// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 -// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to -// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 -// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to -// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 -// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] -// -TYPE(svfloat64) test_svreinterpret_f64_bf16(TYPE(svbfloat16) op) MODE_ATTR { - return SVE_ACLE_FUNC(svreinterpret_f64, _bf16)(op); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret.c index 7c21c297b6a3d..c8d2d03588e1a 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret.c @@ -13101,3 +13101,2512 @@ TYPE(svfloat64) test_svreinterpret_f64_f64(TYPE(svfloat64) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_f64,_f64)(op); } + +// CHECK-LABEL: @test_svreinterpret_s8_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_s8_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_s8_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_s8_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z26test_svreinterpret_s8_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svint8) test_svreinterpret_s8_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_s8, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_s16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_s16_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_s16_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_s16_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_s16_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svint16) test_svreinterpret_s16_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_s16, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_s32_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_s32_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_s32_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_s32_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_s32_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svint32) test_svreinterpret_s32_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_s32, _bf16)(op); +} +// CHECK-LABEL: @test_svreinterpret_s64_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_s64_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_s64_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_s64_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_s64_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svint64) test_svreinterpret_s64_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_s64, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_u8_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_u8_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_u8_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_u8_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z26test_svreinterpret_u8_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svuint8) test_svreinterpret_u8_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_u8, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_u16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_u16_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_u16_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_u16_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_u16_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svuint16) test_svreinterpret_u16_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_u16, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_u32_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_u32_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_u32_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_u32_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_u32_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svuint32) test_svreinterpret_u32_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_u32, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_u64_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_u64_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_u64_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_u64_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_u64_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svuint64) test_svreinterpret_u64_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_u64, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_s8( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_s8( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_s8( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z26test_svreinterpret_bf16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_s8(TYPE(svint8) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _s8)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_s16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_s16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_s16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_s16(TYPE(svint16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _s16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_s32( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_s32( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_s32( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_s32(TYPE(svint32) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _s32)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_s64( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_s64( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_s64( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s64u11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_s64(TYPE(svint64) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _s64)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_u8( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_u8( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_u8( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z26test_svreinterpret_bf16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_u8(TYPE(svuint8) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _u8)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_u16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_u16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_u16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_u16(TYPE(svuint16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _u16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_u32( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_u32( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_u32( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_u32(TYPE(svuint32) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _u32)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_u64( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_u64( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_u64( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u64u12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_u64(TYPE(svuint64) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _u64)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret [[OP:%.*]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] +// +// CPP-CHECK-LABEL: @_Z28test_svreinterpret_bf16_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret [[OP:%.*]] +// +// CPP-TUPLE2-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] +// +// CPP-TUPLE3-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] +// +// CPP-TUPLE4-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_f16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_f16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_f16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f16u13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_f16(TYPE(svfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _f16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_f32( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_f32( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_f32( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f32u13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_f32(TYPE(svfloat32) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _f32)(op); +} + +// CHECK-LABEL: @test_svreinterpret_bf16_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_bf16_f64( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_bf16_f64( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_bf16_f64( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f64u13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svbfloat16) test_svreinterpret_bf16_f64(TYPE(svfloat64) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_bf16, _f64)(op); +} + +// CHECK-LABEL: @test_svreinterpret_f32_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_f32_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_f32_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_f32_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_f32_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svfloat32) test_svreinterpret_f32_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_f32, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_f16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_f16_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_f16_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_f16_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_f16_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svfloat16) test_svreinterpret_f16_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_f16, _bf16)(op); +} + +// CHECK-LABEL: @test_svreinterpret_f64_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CHECK-NEXT: ret [[TMP0]] +// +// TUPLE2-LABEL: @test_svreinterpret_f64_bf16( +// TUPLE2-NEXT: entry: +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] +// +// TUPLE3-LABEL: @test_svreinterpret_f64_bf16( +// TUPLE3-NEXT: entry: +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// TUPLE4-LABEL: @test_svreinterpret_f64_bf16( +// TUPLE4-NEXT: entry: +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +// CPP-CHECK-LABEL: @_Z27test_svreinterpret_f64_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to +// CPP-CHECK-NEXT: ret [[TMP0]] +// +// CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x2_t( +// CPP-TUPLE2-NEXT: entry: +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] +// +// CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x3_t( +// CPP-TUPLE3-NEXT: entry: +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] +// +// CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x4_t( +// CPP-TUPLE4-NEXT: entry: +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] +// +TYPE(svfloat64) test_svreinterpret_f64_bf16(TYPE(svbfloat16) op) MODE_ATTR { + return SVE_ACLE_FUNC(svreinterpret_f64, _bf16)(op); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c deleted file mode 100644 index 9b3e813fa9694..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svrev_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.rev.nxv8bf16( [[OP:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z15test_svrev_bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.rev.nxv8bf16( [[OP:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svrev_bf16(svbfloat16_t op) MODE_ATTR -{ - return SVE_ACLE_FUNC(svrev,_bf16,,)(op); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c index 3c0ae7df79644..839eee402d4b8 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c @@ -246,3 +246,18 @@ svbool_t test_svrev_b64(svbool_t op) MODE_ATTR { return svrev_b64(op); } + +// CHECK-LABEL: @test_svrev_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.rev.nxv8bf16( [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svrev_bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.rev.nxv8bf16( [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svrev_bf16(svbfloat16_t op) MODE_ATTR +{ + return SVE_ACLE_FUNC(svrev,_bf16,,)(op); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_sel-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_sel-bfloat.c deleted file mode 100644 index 82be1904f6770..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_sel-bfloat.c +++ /dev/null @@ -1,41 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svsel_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = select [[TMP0]], [[OP1:%.*]], [[OP2:%.*]] -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z15test_svsel_bf16u10__SVBool_tu14__SVBfloat16_tS0_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = select [[TMP0]], [[OP1:%.*]], [[OP2:%.*]] -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svsel_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - // expected-warning@+1 {{implicit declaration of function 'svsel_bf16'}} - return SVE_ACLE_FUNC(svsel,_bf16,,)(pg, op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_sel.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_sel.c index 9cf7f4d7f45cc..62c63d0c18b8a 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_sel.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_sel.c @@ -219,3 +219,21 @@ svbool_t test_svsel_b(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR { return SVE_ACLE_FUNC(svsel,_b,,)(pg, op1, op2); } + +// CHECK-LABEL: @test_svsel_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = select [[TMP0]], [[OP1:%.*]], [[OP2:%.*]] +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z15test_svsel_bf16u10__SVBool_tu14__SVBfloat16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = select [[TMP0]], [[OP1:%.*]], [[OP2:%.*]] +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svsel_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + // expected-warning@+1 {{implicit declaration of function 'svsel_bf16'}} + return SVE_ACLE_FUNC(svsel,_bf16,,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set2-bfloat.c deleted file mode 100644 index edd30278a9714..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set2-bfloat.c +++ /dev/null @@ -1,61 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svset2_bf16_0( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 -// CHECK-NEXT: ret { , } [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z18test_svset2_bf16_014svbfloat16x2_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 -// CPP-CHECK-NEXT: ret { , } [[TMP2]] -// -svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 0, x); -} - -// CHECK-LABEL: @test_svset2_bf16_1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 -// CHECK-NEXT: ret { , } [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z18test_svset2_bf16_114svbfloat16x2_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 -// CPP-CHECK-NEXT: ret { , } [[TMP2]] -// -svbfloat16x2_t test_svset2_bf16_1(svbfloat16x2_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 1, x); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set2.c index bdf57b42c8d8b..661591aded044 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set2.c @@ -249,3 +249,41 @@ svmfloat8x2_t test_svset2_mf8(svmfloat8x2_t tuple, svmfloat8_t x) ATTR { return SVE_ACLE_FUNC(svset2,_mf8,,)(tuple, 1, x); } + +// CHECK-LABEL: @test_svset2_bf16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , } [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z18test_svset2_bf16_014svbfloat16x2_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] +// +svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 0, x); +} + +// CHECK-LABEL: @test_svset2_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z18test_svset2_bf16_114svbfloat16x2_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] +// +svbfloat16x2_t test_svset2_bf16_1(svbfloat16x2_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 1, x); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set3-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set3-bfloat.c deleted file mode 100644 index 35cf63c44b6aa..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set3-bfloat.c +++ /dev/null @@ -1,86 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svset3_bf16_0( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 -// CHECK-NEXT: ret { , , } [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z18test_svset3_bf16_014svbfloat16x3_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 -// CPP-CHECK-NEXT: ret { , , } [[TMP3]] -// -svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 0, x); -} - -// CHECK-LABEL: @test_svset3_bf16_1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 -// CHECK-NEXT: ret { , , } [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z18test_svset3_bf16_114svbfloat16x3_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 -// CPP-CHECK-NEXT: ret { , , } [[TMP3]] -// -svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 1, x); -} - -// CHECK-LABEL: @test_svset3_bf16_2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 -// CHECK-NEXT: ret { , , } [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z18test_svset3_bf16_214svbfloat16x3_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 -// CPP-CHECK-NEXT: ret { , , } [[TMP3]] -// -svbfloat16x3_t test_svset3_bf16_2(svbfloat16x3_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 2, x); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set3.c index 34f7a9ae6a38c..5eebd63231b1a 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set3.c @@ -275,3 +275,66 @@ svmfloat8x3_t test_svset3_mf8(svmfloat8x3_t tuple, svmfloat8_t x) ATTR { return SVE_ACLE_FUNC(svset3,_mf8,,)(tuple, 1, x); } + +// CHECK-LABEL: @test_svset3_bf16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z18test_svset3_bf16_014svbfloat16x3_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] +// +svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 0, x); +} + +// CHECK-LABEL: @test_svset3_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z18test_svset3_bf16_114svbfloat16x3_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] +// +svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 1, x); +} + +// CHECK-LABEL: @test_svset3_bf16_2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z18test_svset3_bf16_214svbfloat16x3_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] +// +svbfloat16x3_t test_svset3_bf16_2(svbfloat16x3_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 2, x); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set4-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set4-bfloat.c deleted file mode 100644 index 2f6035e6a88cc..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set4-bfloat.c +++ /dev/null @@ -1,115 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#ifdef __ARM_FEATURE_SME -#define ATTR __arm_streaming -#else -#define ATTR -#endif - -// CHECK-LABEL: @test_svset4_bf16_0( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 0 -// CHECK-NEXT: ret { , , , } [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z18test_svset4_bf16_014svbfloat16x4_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 0 -// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] -// -svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 0, x); -} - -// CHECK-LABEL: @test_svset4_bf16_1( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 -// CHECK-NEXT: ret { , , , } [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z18test_svset4_bf16_114svbfloat16x4_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 -// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] -// -svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 1, x); -} - -// CHECK-LABEL: @test_svset4_bf16_2( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 2 -// CHECK-NEXT: ret { , , , } [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z18test_svset4_bf16_214svbfloat16x4_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 2 -// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] -// -svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 2, x); -} - -// CHECK-LABEL: @test_svset4_bf16_3( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 -// CHECK-NEXT: ret { , , , } [[TMP4]] -// -// CPP-CHECK-LABEL: @_Z18test_svset4_bf16_314svbfloat16x4_tu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 -// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] -// -svbfloat16x4_t test_svset4_bf16_3(svbfloat16x4_t tuple, svbfloat16_t x) ATTR -{ - return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 3, x); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set4.c index 06df3c1daee67..fe54445010743 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_set4.c @@ -297,3 +297,95 @@ svmfloat8x4_t test_svset4_mf8(svmfloat8x4_t tuple, svmfloat8_t x) ATTR { return SVE_ACLE_FUNC(svset4,_mf8,,)(tuple, 1, x); } + +// CHECK-LABEL: @test_svset4_bf16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , , , } [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svset4_bf16_014svbfloat16x4_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] +// +svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 0, x); +} + +// CHECK-LABEL: @test_svset4_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svset4_bf16_114svbfloat16x4_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] +// +svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 1, x); +} + +// CHECK-LABEL: @test_svset4_bf16_2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 2 +// CHECK-NEXT: ret { , , , } [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svset4_bf16_214svbfloat16x4_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] +// +svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 2, x); +} + +// CHECK-LABEL: @test_svset4_bf16_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svset4_bf16_314svbfloat16x4_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] +// +svbfloat16x4_t test_svset4_bf16_3(svbfloat16x4_t tuple, svbfloat16_t x) ATTR +{ + return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 3, x); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_splice-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_splice-bfloat.c deleted file mode 100644 index 266e32fb59dd3..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_splice-bfloat.c +++ /dev/null @@ -1,41 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svsplice_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.splice.nxv8bf16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z18test_svsplice_bf16u10__SVBool_tu14__SVBfloat16_tS0_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.splice.nxv8bf16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbfloat16_t test_svsplice_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - // expected-warning@+1 {{implicit declaration of function 'svsplice_bf16'}} - return SVE_ACLE_FUNC(svsplice,_bf16,,)(pg, op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_splice.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_splice.c index fe4e910e37aae..58d194507a175 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_splice.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_splice.c @@ -204,3 +204,21 @@ svfloat64_t test_svsplice_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MOD { return SVE_ACLE_FUNC(svsplice,_f64,,)(pg, op1, op2); } + +// CHECK-LABEL: @test_svsplice_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.splice.nxv8bf16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svsplice_bf16u10__SVBool_tu14__SVBfloat16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.splice.nxv8bf16( [[TMP0]], [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svsplice_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + // expected-warning@+1 {{implicit declaration of function 'svsplice_bf16'}} + return SVE_ACLE_FUNC(svsplice,_bf16,,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1-bfloat.c deleted file mode 100644 index 1d194626418a2..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1-bfloat.c +++ /dev/null @@ -1,66 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svst1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.masked.store.nxv8bf16.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[TMP0]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z15test_svst1_bf16u10__SVBool_tPu6__bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv8bf16.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[TMP0]]) -// CPP-CHECK-NEXT: ret void -// -void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svst1,_bf16,,)(pg, base, data); -} - -// CHECK-LABEL: @test_svst1_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.masked.store.nxv8bf16.p0( [[DATA:%.*]], ptr [[TMP3]], i32 1, [[TMP0]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z20test_svst1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv8bf16.p0( [[DATA:%.*]], ptr [[TMP3]], i32 1, [[TMP0]]) -// CPP-CHECK-NEXT: ret void -// -void test_svst1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svst1_vnum,_bf16,,)(pg, base, vnum, data); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c index 21350007da86f..56f8c32c23099 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c @@ -1243,3 +1243,45 @@ void test_svst1_scatter_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t } #endif + +// CHECK-LABEL: @test_svst1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.masked.store.nxv8bf16.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst1_bf16u10__SVBool_tPu6__bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv8bf16.p0( [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst1,_bf16,,)(pg, base, data); +} + +// CHECK-LABEL: @test_svst1_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.masked.store.nxv8bf16.p0( [[DATA:%.*]], ptr [[TMP3]], i32 1, [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.masked.store.nxv8bf16.p0( [[DATA:%.*]], ptr [[TMP3]], i32 1, [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst1_vnum,_bf16,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2-bfloat.c deleted file mode 100644 index 726aae2fa78a1..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2-bfloat.c +++ /dev/null @@ -1,75 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif -// CHECK-LABEL: @test_svst2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z15test_svst2_bf16u10__SVBool_tPu6__bf1614svbfloat16x2_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret void -// -void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svst2,_bf16,,)(pg, base, data); -} - -// CHECK-LABEL: @test_svst2_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z20test_svst2_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x2_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) -// CPP-CHECK-NEXT: ret void -// -void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svst2_vnum,_bf16,,)(pg, base, vnum, data); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c index 9e73e4464c6f9..cf6d1be579472 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c @@ -633,3 +633,55 @@ void test_svst2_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x2 { return SVE_ACLE_FUNC(svst2_vnum,_mf8,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst2_bf16u10__SVBool_tPu6__bf1614svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst2,_bf16,,)(pg, base, data); +} + +// CHECK-LABEL: @test_svst2_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst2_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst2_vnum,_bf16,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3-bfloat.c deleted file mode 100644 index 2a71029a8f573..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3-bfloat.c +++ /dev/null @@ -1,84 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svst3_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z15test_svst3_bf16u10__SVBool_tPu6__bf1614svbfloat16x3_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret void -// -void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svst3,_bf16,,)(pg, base, data); -} - -// CHECK-LABEL: @test_svst3_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z20test_svst3_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x3_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) -// CPP-CHECK-NEXT: ret void -// -void test_svst3_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x3_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svst3_vnum,_bf16,,)(pg, base, vnum, data); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c index b693b693b1ebb..6241e709854c9 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c @@ -729,3 +729,63 @@ void test_svst3_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x3 { return SVE_ACLE_FUNC(svst3_vnum,_mf8,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst3_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst3_bf16u10__SVBool_tPu6__bf1614svbfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst3,_bf16,,)(pg, base, data); +} + +// CHECK-LABEL: @test_svst3_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst3_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x3_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst3_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x3_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst3_vnum,_bf16,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4-bfloat.c deleted file mode 100644 index 1f4c4fde8ad1b..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4-bfloat.c +++ /dev/null @@ -1,92 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svst4_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z15test_svst4_bf16u10__SVBool_tPu6__bf1614svbfloat16x4_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret void -// -void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svst4,_bf16,,)(pg, base, data); -} - -// CHECK-LABEL: @test_svst4_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 -// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 -// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z20test_svst4_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x4_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 -// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) -// CPP-CHECK-NEXT: ret void -// -void test_svst4_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x4_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svst4_vnum,_bf16,,)(pg, base, vnum, data); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c index f8c3b60682573..0536a8d265b4c 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c @@ -825,3 +825,71 @@ void test_svst4_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8x4 { return SVE_ACLE_FUNC(svst4_vnum,_mf8,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svst4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svst4_bf16u10__SVBool_tPu6__bf1614svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst4,_bf16,,)(pg, base, data); +} + +// CHECK-LABEL: @test_svst4_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst4_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst4_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x4_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svst4_vnum,_bf16,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1-bfloat.c deleted file mode 100644 index c13f7d82caac0..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1-bfloat.c +++ /dev/null @@ -1,60 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svstnt1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z17test_svstnt1_bf16u10__SVBool_tPu6__bf16u14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: ret void -// -void test_svstnt1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svstnt1,_bf16,,)(pg, base, data); -} - -// CHECK-LABEL: @test_svstnt1_vnum_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: ret void -// -// CPP-CHECK-LABEL: @_Z22test_svstnt1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBfloat16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: ret void -// -void test_svstnt1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data) MODE_ATTR -{ - return SVE_ACLE_FUNC(svstnt1_vnum,_bf16,,)(pg, base, vnum, data); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c index f739ea5dca641..4f6b5000e82f1 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c @@ -442,3 +442,39 @@ void test_svstnt1_vnum_mf8(svbool_t pg, mfloat8_t *base, int64_t vnum, svmfloat8 { return SVE_ACLE_FUNC(svstnt1_vnum,_mf8,,)(pg, base, vnum, data); } + +// CHECK-LABEL: @test_svstnt1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svstnt1_bf16u10__SVBool_tPu6__bf16u14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svstnt1,_bf16,,)(pg, base, data); +} + +// CHECK-LABEL: @test_svstnt1_vnum_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svstnt1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data) MODE_ATTR +{ + return SVE_ACLE_FUNC(svstnt1_vnum,_bf16,,)(pg, base, vnum, data); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_tbl-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_tbl-bfloat.c deleted file mode 100644 index d4b6b6842fb9a..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_tbl-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svtbl_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl.nxv8bf16( [[DATA:%.*]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z15test_svtbl_bf16u14__SVBfloat16_tu12__SVUint16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl.nxv8bf16( [[DATA:%.*]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svtbl_bf16(svbfloat16_t data, svuint16_t indices) MODE_ATTR { - // expected-warning@+1 {{implicit declaration of function 'svtbl_bf16'}} - return SVE_ACLE_FUNC(svtbl, _bf16, , )(data, indices); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_tbl.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_tbl.c index 89fa47b5f7974..607a38933623a 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_tbl.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_tbl.c @@ -186,3 +186,18 @@ svfloat64_t test_svtbl_f64(svfloat64_t data, svuint64_t indices) MODE_ATTR { return SVE_ACLE_FUNC(svtbl,_f64,,)(data, indices); } + +// CHECK-LABEL: @test_svtbl_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl.nxv8bf16( [[DATA:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svtbl_bf16u14__SVBfloat16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl.nxv8bf16( [[DATA:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svtbl_bf16(svbfloat16_t data, svuint16_t indices) MODE_ATTR { + // expected-warning@+1 {{implicit declaration of function 'svtbl_bf16'}} + return SVE_ACLE_FUNC(svtbl, _bf16, , )(data, indices); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-bfloat.c deleted file mode 100644 index a6c6dcc571e28..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svtrn1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svtrn1_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svtrn1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - return SVE_ACLE_FUNC(svtrn1,_bf16,,)(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-fp64-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-fp64-bfloat.c deleted file mode 100644 index ea1d515a9b4b2..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-fp64-bfloat.c +++ /dev/null @@ -1,30 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svtrn1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svtrn1_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svtrn1_bf16(svbfloat16_t op1, svbfloat16_t op2) { - return SVE_ACLE_FUNC(svtrn1q, _bf16, , )(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-fp64.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-fp64.c index 04028c7850ce3..ab1d2e6c52a77 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-fp64.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1-fp64.c @@ -168,3 +168,17 @@ svfloat32_t test_svtrn1_f32(svfloat32_t op1, svfloat32_t op2) { svfloat64_t test_svtrn1_f64(svfloat64_t op1, svfloat64_t op2) { return SVE_ACLE_FUNC(svtrn1q, _f64, , )(op1, op2); } + +// CHECK-LABEL: @test_svtrn1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svtrn1_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svtrn1_bf16(svbfloat16_t op1, svbfloat16_t op2) { + return SVE_ACLE_FUNC(svtrn1q, _bf16, , )(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1.c index f6d8ff770c600..bde209e94d230 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn1.c @@ -246,3 +246,18 @@ svbool_t test_svtrn1_b64(svbool_t op1, svbool_t op2) MODE_ATTR { return svtrn1_b64(op1, op2); } + +// CHECK-LABEL: @test_svtrn1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svtrn1_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svtrn1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + return SVE_ACLE_FUNC(svtrn1,_bf16,,)(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-bfloat.c deleted file mode 100644 index 87063ac69dfac..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svtrn2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svtrn2_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svtrn2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - return SVE_ACLE_FUNC(svtrn2,_bf16,,)(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-fp64-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-fp64-bfloat.c deleted file mode 100644 index d8171cc240550..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-fp64-bfloat.c +++ /dev/null @@ -1,30 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svtrn2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svtrn2_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svtrn2_bf16(svbfloat16_t op1, svbfloat16_t op2) { - return SVE_ACLE_FUNC(svtrn2q, _bf16, , )(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-fp64.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-fp64.c index e9859192333f8..00878e423d92b 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-fp64.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2-fp64.c @@ -168,3 +168,17 @@ svfloat32_t test_svtrn2_f32(svfloat32_t op1, svfloat32_t op2) { svfloat64_t test_svtrn2_f64(svfloat64_t op1, svfloat64_t op2) { return SVE_ACLE_FUNC(svtrn2q, _f64, , )(op1, op2); } + +// CHECK-LABEL: @test_svtrn2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svtrn2_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svtrn2_bf16(svbfloat16_t op1, svbfloat16_t op2) { + return SVE_ACLE_FUNC(svtrn2q, _bf16, , )(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2.c index 9442142bc097f..838fc9bf53e2a 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_trn2.c @@ -246,3 +246,18 @@ svbool_t test_svtrn2_b64(svbool_t op1, svbool_t op2) MODE_ATTR { return svtrn2_b64(op1, op2); } + +// CHECK-LABEL: @test_svtrn2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svtrn2_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.trn2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svtrn2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + return SVE_ACLE_FUNC(svtrn2,_bf16,,)(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef-bfloat.c deleted file mode 100644 index b15028c4b2629..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef-bfloat.c +++ /dev/null @@ -1,28 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -// CHECK-LABEL: @test_svundef_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret undef -// -// CPP-CHECK-LABEL: @_Z17test_svundef_bf16v( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef -// -svbfloat16_t test_svundef_bf16(void) MODE_ATTR -{ - return svundef_bf16(); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef.c index 9b4caa7648926..0109c5f348602 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef.c @@ -168,3 +168,16 @@ svmfloat8_t test_svundef_mf8(void) MODE_ATTR { return svundef_mf8(); } + +// CHECK-LABEL: @test_svundef_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret undef +// +// CPP-CHECK-LABEL: @_Z17test_svundef_bf16v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret undef +// +svbfloat16_t test_svundef_bf16(void) MODE_ATTR +{ + return svundef_bf16(); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef2-bfloat.c deleted file mode 100644 index e7325a25cd33b..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef2-bfloat.c +++ /dev/null @@ -1,28 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -// CHECK-LABEL: @test_svundef2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret { , } undef -// -// CPP-CHECK-LABEL: @_Z18test_svundef2_bf16v( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret { , } undef -// -svbfloat16x2_t test_svundef2_bf16(void) MODE_ATTR -{ - return svundef2_bf16(); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef2.c index 27e4caffa348d..24b0f32100970 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef2.c @@ -2,8 +2,8 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include @@ -169,3 +169,16 @@ svmfloat8x2_t test_svundef2_mf8(void) MODE_ATTR { return svundef2_mf8(); } + +// CHECK-LABEL: @test_svundef2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret { , } undef +// +// CPP-CHECK-LABEL: @_Z18test_svundef2_bf16v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret { , } undef +// +svbfloat16x2_t test_svundef2_bf16(void) MODE_ATTR +{ + return svundef2_bf16(); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef3-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef3-bfloat.c deleted file mode 100644 index 7a35431daead1..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef3-bfloat.c +++ /dev/null @@ -1,28 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -// CHECK-LABEL: @test_svundef3_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret { , , } undef -// -// CPP-CHECK-LABEL: @_Z18test_svundef3_bf16v( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret { , , } undef -// -svbfloat16x3_t test_svundef3_bf16(void) MODE_ATTR -{ - return svundef3_bf16(); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef3.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef3.c index 767114fb7984d..26dc671043ac0 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef3.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef3.c @@ -2,8 +2,8 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include @@ -169,3 +169,16 @@ svmfloat8x3_t test_svundef3_mf8(void) MODE_ATTR { return svundef3_mf8(); } + +// CHECK-LABEL: @test_svundef3_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret { , , } undef +// +// CPP-CHECK-LABEL: @_Z18test_svundef3_bf16v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret { , , } undef +// +svbfloat16x3_t test_svundef3_bf16(void) MODE_ATTR +{ + return svundef3_bf16(); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef4-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef4-bfloat.c deleted file mode 100644 index 431b82bb5f106..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef4-bfloat.c +++ /dev/null @@ -1,28 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -// CHECK-LABEL: @test_svundef4_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret { , , , } undef -// -// CPP-CHECK-LABEL: @_Z18test_svundef4_bf16v( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret { , , , } undef -// -svbfloat16x4_t test_svundef4_bf16(void) MODE_ATTR -{ - return svundef4_bf16(); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef4.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef4.c index fe0b8965ed0e7..fb25f20b0f5fd 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef4.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_undef4.c @@ -2,8 +2,8 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include @@ -169,3 +169,16 @@ svmfloat8x4_t test_svundef4_mf8(void) MODE_ATTR { return svundef4_mf8(); } + +// CHECK-LABEL: @test_svundef4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret { , , , } undef +// +// CPP-CHECK-LABEL: @_Z18test_svundef4_bf16v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret { , , , } undef +// +svbfloat16x4_t test_svundef4_bf16(void) MODE_ATTR +{ + return svundef4_bf16(); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-bfloat.c deleted file mode 100644 index 91863a400480a..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svuzp1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svuzp1_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svuzp1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - return SVE_ACLE_FUNC(svuzp1,_bf16,,)(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-fp64-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-fp64-bfloat.c deleted file mode 100644 index 0ce3685db5071..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-fp64-bfloat.c +++ /dev/null @@ -1,30 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svuzp1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svuzp1_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svuzp1_bf16(svbfloat16_t op1, svbfloat16_t op2) { - return SVE_ACLE_FUNC(svuzp1q, _bf16, , )(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-fp64.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-fp64.c index c8e36d8ca2bb0..11b410da0dc8b 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-fp64.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1-fp64.c @@ -168,3 +168,17 @@ svfloat32_t test_svuzp1_f32(svfloat32_t op1, svfloat32_t op2) { svfloat64_t test_svuzp1_f64(svfloat64_t op1, svfloat64_t op2) { return SVE_ACLE_FUNC(svuzp1q, _f64, , )(op1, op2); } + +// CHECK-LABEL: @test_svuzp1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svuzp1_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svuzp1_bf16(svbfloat16_t op1, svbfloat16_t op2) { + return SVE_ACLE_FUNC(svuzp1q, _bf16, , )(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1.c index 3581c04db0858..90865a07bd307 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp1.c @@ -246,3 +246,18 @@ svbool_t test_svuzp1_b64(svbool_t op1, svbool_t op2) MODE_ATTR { return svuzp1_b64(op1, op2); } + +// CHECK-LABEL: @test_svuzp1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svuzp1_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svuzp1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + return SVE_ACLE_FUNC(svuzp1,_bf16,,)(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-bfloat.c deleted file mode 100644 index 83ba97e40d527..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svuzp2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svuzp2_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svuzp2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - return SVE_ACLE_FUNC(svuzp2,_bf16,,)(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-fp64-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-fp64-bfloat.c deleted file mode 100644 index 2cd16655fbb15..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-fp64-bfloat.c +++ /dev/null @@ -1,30 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svuzp2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svuzp2_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svuzp2_bf16(svbfloat16_t op1, svbfloat16_t op2) { - return SVE_ACLE_FUNC(svuzp2q, _bf16, , )(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-fp64.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-fp64.c index c5ced8c80c982..087bb5e7c8173 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-fp64.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2-fp64.c @@ -168,3 +168,17 @@ svfloat32_t test_svuzp2_f32(svfloat32_t op1, svfloat32_t op2) { svfloat64_t test_svuzp2_f64(svfloat64_t op1, svfloat64_t op2) { return SVE_ACLE_FUNC(svuzp2q, _f64, , )(op1, op2); } + +// CHECK-LABEL: @test_svuzp2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svuzp2_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svuzp2_bf16(svbfloat16_t op1, svbfloat16_t op2) { + return SVE_ACLE_FUNC(svuzp2q, _bf16, , )(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2.c index 959afbd72e090..8e00703720557 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_uzp2.c @@ -246,3 +246,18 @@ svbool_t test_svuzp2_b64(svbool_t op1, svbool_t op2) MODE_ATTR { return svuzp2_b64(op1, op2); } + +// CHECK-LABEL: @test_svuzp2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svuzp2_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uzp2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svuzp2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + return SVE_ACLE_FUNC(svuzp2,_bf16,,)(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-bfloat.c deleted file mode 100644 index 31d5e34e3cd84..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svzip1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svzip1_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svzip1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - return SVE_ACLE_FUNC(svzip1,_bf16,,)(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-fp64-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-fp64-bfloat.c deleted file mode 100644 index f7ea7e177d69a..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-fp64-bfloat.c +++ /dev/null @@ -1,30 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svzip1_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svzip1_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svzip1_bf16(svbfloat16_t op1, svbfloat16_t op2) { - return SVE_ACLE_FUNC(svzip1q, _bf16, , )(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-fp64.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-fp64.c index b5aef2270c3cc..2dd0bb6b0f1a5 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-fp64.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1-fp64.c @@ -168,3 +168,17 @@ svfloat32_t test_svzip1_f32(svfloat32_t op1, svfloat32_t op2) { svfloat64_t test_svzip1_f64(svfloat64_t op1, svfloat64_t op2) { return SVE_ACLE_FUNC(svzip1q, _f64, , )(op1, op2); } + +// CHECK-LABEL: @test_svzip1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svzip1_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip1q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svzip1_bf16(svbfloat16_t op1, svbfloat16_t op2) { + return SVE_ACLE_FUNC(svzip1q, _bf16, , )(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1.c index 95a0f499248f1..5984b4be3a93e 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip1.c @@ -246,3 +246,18 @@ svbool_t test_svzip1_b64(svbool_t op1, svbool_t op2) MODE_ATTR { return svzip1_b64(op1, op2); } + +// CHECK-LABEL: @test_svzip1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svzip1_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip1.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svzip1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + return SVE_ACLE_FUNC(svzip1,_bf16,,)(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-bfloat.c deleted file mode 100644 index d750fae041840..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-bfloat.c +++ /dev/null @@ -1,39 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s - -#include - -#if defined __ARM_FEATURE_SME -#define MODE_ATTR __arm_streaming -#else -#define MODE_ATTR -#endif - - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svzip2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svzip2_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svzip2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR -{ - return SVE_ACLE_FUNC(svzip2,_bf16,,)(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-fp64-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-fp64-bfloat.c deleted file mode 100644 index b3ffe987830ed..0000000000000 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-fp64-bfloat.c +++ /dev/null @@ -1,30 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -target-feature +f64mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svzip2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svzip2_bf16u14__SVBfloat16_tS_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svzip2_bf16(svbfloat16_t op1, svbfloat16_t op2) { - return SVE_ACLE_FUNC(svzip2q, _bf16, , )(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-fp64.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-fp64.c index a890d8c77e9e6..4161e3f7ca958 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-fp64.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2-fp64.c @@ -168,3 +168,17 @@ svfloat32_t test_svzip2_f32(svfloat32_t op1, svfloat32_t op2) { svfloat64_t test_svzip2_f64(svfloat64_t op1, svfloat64_t op2) { return SVE_ACLE_FUNC(svzip2q, _f64, , )(op1, op2); } + +// CHECK-LABEL: @test_svzip2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svzip2_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip2q.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svzip2_bf16(svbfloat16_t op1, svbfloat16_t op2) { + return SVE_ACLE_FUNC(svzip2q, _bf16, , )(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2.c index 5b19cfd673f3d..b86871bc7a8e0 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_zip2.c @@ -246,3 +246,18 @@ svbool_t test_svzip2_b64(svbool_t op1, svbool_t op2) MODE_ATTR { return svzip2_b64(op1, op2); } + +// CHECK-LABEL: @test_svzip2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svzip2_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.zip2.nxv8bf16( [[OP1:%.*]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svzip2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR +{ + return SVE_ACLE_FUNC(svzip2,_bf16,,)(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_luti.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_luti.c index 82e318a7460c2..22a9c6fbf7b03 100644 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_luti.c +++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_luti.c @@ -1,14 +1,14 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sme -target-feature +sme2 -target-feature +lut -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: -target-feature +sme -target-feature +sme2 -target-feature +lut -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sve -target-feature +sve2 -target-feature +lut -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: -target-feature +sve -target-feature +sve2 -target-feature +lut -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sve -target-feature +sve2 -target-feature +lut -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: -target-feature +sve -target-feature +sve2 -target-feature +lut -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sve -target-feature +sve2 -target-feature +lut -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +lut -target-feature +bf16 -O1 -Werror -Wall -o /dev/null %s +// RUN: -target-feature +sve -target-feature +sve2 -target-feature +lut -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +lut -O1 -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c index 2143f27f95e45..a454511576241 100644 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c +++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c @@ -1,17 +1,17 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +bf16 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: -target-feature +sve -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s #include #if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbl2-bfloat.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbl2-bfloat.c deleted file mode 100644 index 96af8c0bfa97d..0000000000000 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbl2-bfloat.c +++ /dev/null @@ -1,29 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svtbl2_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8bf16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z16test_svtbl2_bf1614svbfloat16x2_tu12__SVUint16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8bf16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svtbl2_bf16(svbfloat16x2_t data, svuint16_t indices) { - return SVE_ACLE_FUNC(svtbl2, _bf16, , )(data, indices); -} diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbl2.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbl2.c index 999a87fcfbbc8..a1ae50203b853 100644 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbl2.c +++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbl2.c @@ -179,3 +179,17 @@ svfloat64_t test_svtbl2_f64(svfloat64x2_t data, svuint64_t indices) { return SVE_ACLE_FUNC(svtbl2,_f64,,)(data, indices); } + +// CHECK-LABEL: @test_svtbl2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8bf16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svtbl2_bf1614svbfloat16x2_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8bf16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svtbl2_bf16(svbfloat16x2_t data, svuint16_t indices) { + return SVE_ACLE_FUNC(svtbl2, _bf16, , )(data, indices); +} diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbx-bfloat.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbx-bfloat.c deleted file mode 100644 index c39d644e1b84f..0000000000000 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbx-bfloat.c +++ /dev/null @@ -1,29 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svtbx_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbx.nxv8bf16( [[FALLBACK:%.*]], [[DATA:%.*]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z15test_svtbx_bf16u14__SVBfloat16_tS_u12__SVUint16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbx.nxv8bf16( [[FALLBACK:%.*]], [[DATA:%.*]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbfloat16_t test_svtbx_bf16(svbfloat16_t fallback, svbfloat16_t data, svuint16_t indices) { - return SVE_ACLE_FUNC(svtbx, _bf16, , )(fallback, data, indices); -} diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbx.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbx.c index d1e5893e52d3e..3cef7d66b6a2e 100644 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbx.c +++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_tbx.c @@ -179,3 +179,17 @@ svfloat64_t test_svtbx_f64(svfloat64_t fallback, svfloat64_t data, svuint64_t in { return SVE_ACLE_FUNC(svtbx,_f64,,)(fallback, data, indices); } + +// CHECK-LABEL: @test_svtbx_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbx.nxv8bf16( [[FALLBACK:%.*]], [[DATA:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svtbx_bf16u14__SVBfloat16_tS_u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbx.nxv8bf16( [[FALLBACK:%.*]], [[DATA:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svtbx_bf16(svbfloat16_t fallback, svbfloat16_t data, svuint16_t indices) { + return SVE_ACLE_FUNC(svtbx, _bf16, , )(fallback, data, indices); +} diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilerw-bfloat.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilerw-bfloat.c deleted file mode 100644 index 95b0f53abdce0..0000000000000 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilerw-bfloat.c +++ /dev/null @@ -1,33 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svwhilerw_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z19test_svwhilerw_bf16PKu6__bf16S0_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbool_t test_svwhilerw_bf16(const bfloat16_t *op1, const bfloat16_t *op2) -{ - return SVE_ACLE_FUNC(svwhilerw,_bf16,,)(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilerw.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilerw.c index 13f1984db94cc..59462657693bf 100644 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilerw.c +++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilerw.c @@ -197,3 +197,20 @@ svbool_t test_svwhilerw_f64(const float64_t *op1, const float64_t *op2) { return SVE_ACLE_FUNC(svwhilerw,_f64,,)(op1, op2); } + +// CHECK-LABEL: @test_svwhilerw_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svwhilerw_bf16PKu6__bf16S0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilerw.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svwhilerw_bf16(const bfloat16_t *op1, const bfloat16_t *op2) +{ + return SVE_ACLE_FUNC(svwhilerw,_bf16,,)(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilewr-bfloat.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilewr-bfloat.c deleted file mode 100644 index 647f2aef98d81..0000000000000 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilewr-bfloat.c +++ /dev/null @@ -1,33 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +bf16 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -// CHECK-LABEL: @test_svwhilewr_bf16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z19test_svwhilewr_bf16PKu6__bf16S0_( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbool_t test_svwhilewr_bf16(const bfloat16_t *op1, const bfloat16_t *op2) -{ - return SVE_ACLE_FUNC(svwhilewr,_bf16,,)(op1, op2); -} diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilewr.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilewr.c index fddede6a4dc09..469e299f6dd47 100644 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilewr.c +++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_whilewr.c @@ -197,3 +197,20 @@ svbool_t test_svwhilewr_f64(const float64_t *op1, const float64_t *op2) { return SVE_ACLE_FUNC(svwhilewr,_f64,,)(op1, op2); } + +// CHECK-LABEL: @test_svwhilewr_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z19test_svwhilewr_bf16PKu6__bf16S0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0(ptr [[OP1:%.*]], ptr [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svwhilewr_bf16(const bfloat16_t *op1, const bfloat16_t *op2) +{ + return SVE_ACLE_FUNC(svwhilewr,_bf16,,)(op1, op2); +} diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfadd.c index 0f3b92f81cdee..9d45b829c39bd 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfadd.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfadd.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmax.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmax.c index cc3207aba01e4..a00ca93f9a6af 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmax.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmax.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c index 7983943af3d89..0ad3b448ee2cf 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmin.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmin.c index 97159f119375a..fcd71acb38327 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmin.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmin.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfminnm.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfminnm.c index 4cadbdc2b4d88..e261eb4b8113c 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfminnm.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfminnm.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmla.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmla.c index 720853fccb650..fe6f19e9370a0 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmla.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmla.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmls.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmls.c index 98c4dc95fecdf..4c1f8d2676625 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmls.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmls.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c index ad1128fd2b896..83afd04880bbc 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c @@ -2,15 +2,15 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmul.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmul.c index e9443e35cee57..2939b684be744 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmul.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmul.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfsub.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfsub.c index a58f0046e61f3..a27c316b53bd1 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfsub.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfsub.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sve-b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined __ARM_FEATURE_SME diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_cntp.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_cntp.c index 1de597fef1f45..ae450d7bbaf1d 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_cntp.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_cntp.c @@ -3,10 +3,10 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_create2_bool.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_create2_bool.c index f416fe2b1b6ef..1574e4c12c753 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_create2_bool.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_create2_bool.c @@ -5,18 +5,18 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_create4_bool.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_create4_bool.c index 0026124deaae8..a145c2df97660 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_create4_bool.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_create4_bool.c @@ -5,18 +5,18 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dot.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dot.c index 8543385dd263d..704a245ed1460 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dot.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dot.c @@ -2,14 +2,14 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c index c9ddb324d843b..f172d71b4f42a 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_dupq.c @@ -1,20 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c index 38769aeee8b2b..3e71ba068b8b9 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_extq.c @@ -1,20 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fclamp.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fclamp.c index 8fbcc43678c7d..ef2bfd02b077d 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fclamp.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_fclamp.c @@ -16,7 +16,7 @@ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_get2_bool.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_get2_bool.c index cb30296cb618a..844e8c5791a0c 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_get2_bool.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_get2_bool.c @@ -5,18 +5,18 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_get4_bool.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_get4_bool.c index 611fc061f810e..5553c8669a015 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_get4_bool.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_get4_bool.c @@ -5,18 +5,18 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c index ee5c2c592c61d..af39be3c8c06e 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wno-unknown-attributes -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c index 692af131e69de..02c7586f15122 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c index 5119bc085bc78..3e39fac2ce3a1 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c index 0a87469887df9..b3cf65a576fab 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pext.c index deb126236ad57..15c01b9db8afa 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pext.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pext.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pfalse.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pfalse.c index 5c83789a19505..fdf9c498341fa 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pfalse.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_pfalse.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_psel.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_psel.c index d2fff9f3002b7..8b7761e981c96 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_psel.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_psel.c @@ -9,8 +9,8 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +bf16 -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_psel_svcount.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_psel_svcount.c index 618836b044c77..589c540dcafb6 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_psel_svcount.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_psel_svcount.c @@ -11,8 +11,8 @@ // RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ptrue.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ptrue.c index aeaf4d7ae0e4c..73025d5df5c69 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ptrue.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ptrue.c @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_qcvtn.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_qcvtn.c index f80f7455c539a..091a17ec1bc76 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_qcvtn.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_qcvtn.c @@ -2,17 +2,17 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_qrshr.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_qrshr.c index baa4acf6ec6c9..b3a33190fc4fa 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_qrshr.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_qrshr.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_sclamp.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_sclamp.c index 07b77dbb1378c..51e522d6f2625 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_sclamp.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_sclamp.c @@ -10,13 +10,13 @@ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sve \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_set2_bool.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_set2_bool.c index 08b9094a0082d..3d8490188b130 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_set2_bool.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_set2_bool.c @@ -5,18 +5,18 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_set4_bool.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_set4_bool.c index 9b20d23d0fe84..8bc8db0717b57 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_set4_bool.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_set4_bool.c @@ -5,18 +5,18 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c index e71e68114a5af..092f31ba8491a 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c index 1544260377a20..99dff2c0a5ab2 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c index c747fc025c74c..058cc3afd4560 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c index 863189c5051eb..60c5701dec6f0 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c index 5e8007f952538..280768db52b71 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tblq.c @@ -1,20 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c index ee44ff57cee03..8ac4ebf6a6057 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_tbxq.c @@ -1,20 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uclamp.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uclamp.c index 4c4ffeef38c18..4a23e9d3708dc 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uclamp.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uclamp.c @@ -10,13 +10,13 @@ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sve \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme \ +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_undef_bool.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_undef_bool.c index c0e429307ba32..2c988382e2bef 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_undef_bool.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_undef_bool.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c index c059ee00fb8eb..979105d5be91e 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq1.c @@ -1,20 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c index c7d2a0967d809..cfe295ea64d7f 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_uzpq2.c @@ -1,20 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_while_pn.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_while_pn.c index d02b8069d2a29..38a95a2ca7835 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_while_pn.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_while_pn.c @@ -1,10 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_while_x2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_while_x2.c index bd485f15e490e..99bfdd1de5790 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_while_x2.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_while_x2.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -S -disable-O0-optnone -Werror -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c index 1d6f90fb9e514..1fc35a25c6e06 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq1.c @@ -1,20 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c index b900507d43259..2efac16a43add 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_zipq2.c @@ -1,20 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16\ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/PowerPC/check-zero-vector.c b/clang/test/CodeGen/PowerPC/check-zero-vector.c new file mode 100644 index 0000000000000..cb6c826641366 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/check-zero-vector.c @@ -0,0 +1,143 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64 +// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64LE +// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_32 + +// POWERPC_64-LABEL: define signext i32 @test_Greater_than( +// POWERPC_64-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] { +// POWERPC_64-NEXT: [[ENTRY:.*:]] +// POWERPC_64-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8 +// POWERPC_64-NEXT: [[RESULT:%.*]] = alloca i16, align 2 +// POWERPC_64-NEXT: [[I:%.*]] = alloca i32, align 4 +// POWERPC_64-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8 +// POWERPC_64-NEXT: store i16 0, ptr [[RESULT]], align 2 +// POWERPC_64-NEXT: store i32 0, ptr [[I]], align 4 +// POWERPC_64-NEXT: br label %[[FOR_COND:.*]] +// POWERPC_64: [[FOR_COND]]: +// POWERPC_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// POWERPC_64-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// POWERPC_64: [[FOR_BODY]]: +// POWERPC_64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8 +// POWERPC_64-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// POWERPC_64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]] +// POWERPC_64-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// POWERPC_64-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32 +// POWERPC_64-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 +// POWERPC_64-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// POWERPC_64: [[IF_THEN]]: +// POWERPC_64-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2 +// POWERPC_64-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1 +// POWERPC_64-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2 +// POWERPC_64-NEXT: br label %[[IF_END]] +// POWERPC_64: [[IF_END]]: +// POWERPC_64-NEXT: br label %[[FOR_INC:.*]] +// POWERPC_64: [[FOR_INC]]: +// POWERPC_64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_64-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1 +// POWERPC_64-NEXT: store i32 [[INC3]], ptr [[I]], align 4 +// POWERPC_64-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// POWERPC_64: [[FOR_END]]: +// POWERPC_64-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2 +// POWERPC_64-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32 +// POWERPC_64-NEXT: ret i32 [[CONV4]] +// +// POWERPC_64LE-LABEL: define dso_local signext i32 @test_Greater_than( +// POWERPC_64LE-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] { +// POWERPC_64LE-NEXT: [[ENTRY:.*:]] +// POWERPC_64LE-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8 +// POWERPC_64LE-NEXT: [[RESULT:%.*]] = alloca i16, align 2 +// POWERPC_64LE-NEXT: [[I:%.*]] = alloca i32, align 4 +// POWERPC_64LE-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8 +// POWERPC_64LE-NEXT: store i16 0, ptr [[RESULT]], align 2 +// POWERPC_64LE-NEXT: store i32 0, ptr [[I]], align 4 +// POWERPC_64LE-NEXT: br label %[[FOR_COND:.*]] +// POWERPC_64LE: [[FOR_COND]]: +// POWERPC_64LE-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_64LE-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// POWERPC_64LE-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// POWERPC_64LE: [[FOR_BODY]]: +// POWERPC_64LE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8 +// POWERPC_64LE-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_64LE-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// POWERPC_64LE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]] +// POWERPC_64LE-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// POWERPC_64LE-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32 +// POWERPC_64LE-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 +// POWERPC_64LE-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// POWERPC_64LE: [[IF_THEN]]: +// POWERPC_64LE-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2 +// POWERPC_64LE-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1 +// POWERPC_64LE-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2 +// POWERPC_64LE-NEXT: br label %[[IF_END]] +// POWERPC_64LE: [[IF_END]]: +// POWERPC_64LE-NEXT: br label %[[FOR_INC:.*]] +// POWERPC_64LE: [[FOR_INC]]: +// POWERPC_64LE-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_64LE-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1 +// POWERPC_64LE-NEXT: store i32 [[INC3]], ptr [[I]], align 4 +// POWERPC_64LE-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// POWERPC_64LE: [[FOR_END]]: +// POWERPC_64LE-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2 +// POWERPC_64LE-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32 +// POWERPC_64LE-NEXT: ret i32 [[CONV4]] +// +// POWERPC_32-LABEL: define i32 @test_Greater_than( +// POWERPC_32-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] { +// POWERPC_32-NEXT: [[ENTRY:.*:]] +// POWERPC_32-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 4 +// POWERPC_32-NEXT: [[RESULT:%.*]] = alloca i16, align 2 +// POWERPC_32-NEXT: [[I:%.*]] = alloca i32, align 4 +// POWERPC_32-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 4 +// POWERPC_32-NEXT: store i16 0, ptr [[RESULT]], align 2 +// POWERPC_32-NEXT: store i32 0, ptr [[I]], align 4 +// POWERPC_32-NEXT: br label %[[FOR_COND:.*]] +// POWERPC_32: [[FOR_COND]]: +// POWERPC_32-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// POWERPC_32-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// POWERPC_32: [[FOR_BODY]]: +// POWERPC_32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 4 +// POWERPC_32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 [[TMP2]] +// POWERPC_32-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// POWERPC_32-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32 +// POWERPC_32-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 +// POWERPC_32-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// POWERPC_32: [[IF_THEN]]: +// POWERPC_32-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2 +// POWERPC_32-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1 +// POWERPC_32-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2 +// POWERPC_32-NEXT: br label %[[IF_END]] +// POWERPC_32: [[IF_END]]: +// POWERPC_32-NEXT: br label %[[FOR_INC:.*]] +// POWERPC_32: [[FOR_INC]]: +// POWERPC_32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// POWERPC_32-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1 +// POWERPC_32-NEXT: store i32 [[INC3]], ptr [[I]], align 4 +// POWERPC_32-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// POWERPC_32: [[FOR_END]]: +// POWERPC_32-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2 +// POWERPC_32-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32 +// POWERPC_32-NEXT: ret i32 [[CONV4]] +// +int test_Greater_than(unsigned short *colauths) { + unsigned short result = 0; + for (int i = 0; i < 4; i++) { + if (colauths[i] > 0) { + result++; + } + } + return result; +} +//. +// POWERPC_64: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} +// POWERPC_64: [[META3]] = !{!"llvm.loop.mustprogress"} +//. +// POWERPC_64LE: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} +// POWERPC_64LE: [[META3]] = !{!"llvm.loop.mustprogress"} +//. +// POWERPC_32: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} +// POWERPC_32: [[META3]] = !{!"llvm.loop.mustprogress"} +//. diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwcvt.c index f4c2557855caf..f613ca1a16fc8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwcvt.c @@ -9,7 +9,7 @@ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4(vint8mf8_t src, size_t vl) { @@ -19,7 +19,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4(vint8mf8_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i16.nxv2i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i16.nxv2i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2(vint8mf4_t src, size_t vl) { @@ -29,7 +29,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2(vint8mf4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i16.nxv4i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i16.nxv4i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1(vint8mf2_t src, size_t vl) { @@ -39,7 +39,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1(vint8mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i16.nxv8i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i16.nxv8i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2(vint8m1_t src, size_t vl) { @@ -49,7 +49,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2(vint8m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i16.nxv16i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i16.nxv16i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4(vint8m2_t src, size_t vl) { @@ -59,7 +59,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4(vint8m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv32i16.nxv32i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv32i16.nxv32i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8(vint8m4_t src, size_t vl) { @@ -69,7 +69,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8(vint8m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i32.nxv1i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i32.nxv1i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2(vint16mf4_t src, size_t vl) { @@ -79,7 +79,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2(vint16mf4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i32.nxv2i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i32.nxv2i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1(vint16mf2_t src, size_t vl) { @@ -89,7 +89,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1(vint16mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i32.nxv4i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i32.nxv4i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2(vint16m1_t src, size_t vl) { @@ -99,7 +99,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2(vint16m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i32.nxv8i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i32.nxv8i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4(vint16m2_t src, size_t vl) { @@ -109,7 +109,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4(vint16m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i32.nxv16i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i32.nxv16i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8(vint16m4_t src, size_t vl) { @@ -119,7 +119,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8(vint16m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i64.nxv1i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i64.nxv1i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1(vint32mf2_t src, size_t vl) { @@ -129,7 +129,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1(vint32mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i64.nxv2i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i64.nxv2i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2(vint32m1_t src, size_t vl) { @@ -139,7 +139,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2(vint32m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i64.nxv4i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i64.nxv4i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4(vint32m2_t src, size_t vl) { @@ -149,7 +149,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4(vint32m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i64.nxv8i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i64.nxv8i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8(vint32m4_t src, size_t vl) { @@ -159,7 +159,7 @@ vint64m8_t test_vwcvt_x_x_v_i64m8(vint32m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_m(vbool64_t mask, vint8mf8_t src, size_t vl) { @@ -169,7 +169,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_m(vbool64_t mask, vint8mf8_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_m(vbool32_t mask, vint8mf4_t src, size_t vl) { @@ -179,7 +179,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_m(vbool32_t mask, vint8mf4_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_m(vbool16_t mask, vint8mf2_t src, size_t vl) { @@ -189,7 +189,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_m(vbool16_t mask, vint8mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_m(vbool8_t mask, vint8m1_t src, size_t vl) { @@ -199,7 +199,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_m(vbool8_t mask, vint8m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_m(vbool4_t mask, vint8m2_t src, size_t vl) { @@ -209,7 +209,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_m(vbool4_t mask, vint8m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_m(vbool2_t mask, vint8m4_t src, size_t vl) { @@ -219,7 +219,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_m(vbool2_t mask, vint8m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_m(vbool64_t mask, vint16mf4_t src, size_t vl) { @@ -229,7 +229,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_m(vbool64_t mask, vint16mf4_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_m(vbool32_t mask, vint16mf2_t src, size_t vl) { @@ -239,7 +239,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_m(vbool32_t mask, vint16mf2_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_m(vbool16_t mask, vint16m1_t src, size_t vl) { @@ -249,7 +249,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_m(vbool16_t mask, vint16m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_m(vbool8_t mask, vint16m2_t src, size_t vl) { @@ -259,7 +259,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_m(vbool8_t mask, vint16m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_m(vbool4_t mask, vint16m4_t src, size_t vl) { @@ -269,7 +269,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_m(vbool4_t mask, vint16m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_m(vbool64_t mask, vint32mf2_t src, size_t vl) { @@ -279,7 +279,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_m(vbool64_t mask, vint32mf2_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_m(vbool32_t mask, vint32m1_t src, size_t vl) { @@ -289,7 +289,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_m(vbool32_t mask, vint32m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_m(vbool16_t mask, vint32m2_t src, size_t vl) { @@ -299,7 +299,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_m(vbool16_t mask, vint32m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_m(vbool8_t mask, vint32m4_t src, size_t vl) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwcvtu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwcvtu.c index 6026e80b5f600..ac1a7dd8b8ec0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwcvtu.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwcvtu.c @@ -9,7 +9,7 @@ // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4(vuint8mf8_t src, size_t vl) { @@ -19,7 +19,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4(vuint8mf8_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i16.nxv2i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i16.nxv2i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2(vuint8mf4_t src, size_t vl) { @@ -29,7 +29,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2(vuint8mf4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i16.nxv4i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i16.nxv4i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1(vuint8mf2_t src, size_t vl) { @@ -39,7 +39,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1(vuint8mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2(vuint8m1_t src, size_t vl) { @@ -49,7 +49,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2(vuint8m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4(vuint8m2_t src, size_t vl) { @@ -59,7 +59,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4(vuint8m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8(vuint8m4_t src, size_t vl) { @@ -69,7 +69,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8(vuint8m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i32.nxv1i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i32.nxv1i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2(vuint16mf4_t src, size_t vl) { @@ -79,7 +79,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2(vuint16mf4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i32.nxv2i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i32.nxv2i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1(vuint16mf2_t src, size_t vl) { @@ -89,7 +89,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1(vuint16mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2(vuint16m1_t src, size_t vl) { @@ -99,7 +99,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2(vuint16m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4(vuint16m2_t src, size_t vl) { @@ -109,7 +109,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4(vuint16m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8(vuint16m4_t src, size_t vl) { @@ -119,7 +119,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8(vuint16m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i64.nxv1i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i64.nxv1i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1(vuint32mf2_t src, size_t vl) { @@ -129,7 +129,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1(vuint32mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2(vuint32m1_t src, size_t vl) { @@ -139,7 +139,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2(vuint32m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4(vuint32m2_t src, size_t vl) { @@ -149,7 +149,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4(vuint32m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8(vuint32m4_t src, size_t vl) { @@ -159,7 +159,7 @@ vuint64m8_t test_vwcvtu_x_x_v_u64m8(vuint32m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_m(vbool64_t mask, vuint8mf8_t src, size_t vl) { @@ -169,7 +169,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_m(vbool64_t mask, vuint8mf8_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_m(vbool32_t mask, vuint8mf4_t src, size_t vl) { @@ -179,7 +179,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_m(vbool32_t mask, vuint8mf4_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_m(vbool16_t mask, vuint8mf2_t src, size_t vl) { @@ -189,7 +189,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_m(vbool16_t mask, vuint8mf2_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_m(vbool8_t mask, vuint8m1_t src, size_t vl) { @@ -199,7 +199,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_m(vbool8_t mask, vuint8m1_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_m(vbool4_t mask, vuint8m2_t src, size_t vl) { @@ -209,7 +209,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_m(vbool4_t mask, vuint8m2_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_m(vbool2_t mask, vuint8m4_t src, size_t vl) { @@ -219,7 +219,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_m(vbool2_t mask, vuint8m4_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_m(vbool64_t mask, vuint16mf4_t src, size_t vl) { @@ -229,7 +229,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_m(vbool64_t mask, vuint16mf4_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_m(vbool32_t mask, vuint16mf2_t src, size_t vl) { @@ -239,7 +239,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_m(vbool32_t mask, vuint16mf2_t src, size_t v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_m(vbool16_t mask, vuint16m1_t src, size_t vl) { @@ -249,7 +249,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_m(vbool16_t mask, vuint16m1_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_m(vbool8_t mask, vuint16m2_t src, size_t vl) { @@ -259,7 +259,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_m(vbool8_t mask, vuint16m2_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_m(vbool4_t mask, vuint16m4_t src, size_t vl) { @@ -269,7 +269,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_m(vbool4_t mask, vuint16m4_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_m(vbool64_t mask, vuint32mf2_t src, size_t vl) { @@ -279,7 +279,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_m(vbool64_t mask, vuint32mf2_t src, size_t v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_m(vbool32_t mask, vuint32m1_t src, size_t vl) { @@ -289,7 +289,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_m(vbool32_t mask, vuint32m1_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_m(vbool16_t mask, vuint32m2_t src, size_t vl) { @@ -299,7 +299,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_m(vbool16_t mask, vuint32m2_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_m(vbool8_t mask, vuint32m4_t src, size_t vl) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwcvt.c index b60cf83a4a780..63d8ccfa7d966 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwcvt.c @@ -9,7 +9,7 @@ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4(vint8mf8_t src, size_t vl) { @@ -19,7 +19,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4(vint8mf8_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i16.nxv2i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i16.nxv2i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2(vint8mf4_t src, size_t vl) { @@ -29,7 +29,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2(vint8mf4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i16.nxv4i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i16.nxv4i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1(vint8mf2_t src, size_t vl) { @@ -39,7 +39,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1(vint8mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i16.nxv8i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i16.nxv8i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2(vint8m1_t src, size_t vl) { @@ -49,7 +49,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2(vint8m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i16.nxv16i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i16.nxv16i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4(vint8m2_t src, size_t vl) { @@ -59,7 +59,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4(vint8m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv32i16.nxv32i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv32i16.nxv32i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8(vint8m4_t src, size_t vl) { @@ -69,7 +69,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8(vint8m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i32.nxv1i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i32.nxv1i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2(vint16mf4_t src, size_t vl) { @@ -79,7 +79,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2(vint16mf4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i32.nxv2i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i32.nxv2i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1(vint16mf2_t src, size_t vl) { @@ -89,7 +89,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1(vint16mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i32.nxv4i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i32.nxv4i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2(vint16m1_t src, size_t vl) { @@ -99,7 +99,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2(vint16m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i32.nxv8i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i32.nxv8i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4(vint16m2_t src, size_t vl) { @@ -109,7 +109,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4(vint16m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i32.nxv16i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i32.nxv16i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8(vint16m4_t src, size_t vl) { @@ -119,7 +119,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8(vint16m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i64.nxv1i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i64.nxv1i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1(vint32mf2_t src, size_t vl) { @@ -129,7 +129,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1(vint32mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i64.nxv2i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i64.nxv2i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2(vint32m1_t src, size_t vl) { @@ -139,7 +139,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2(vint32m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i64.nxv4i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i64.nxv4i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4(vint32m2_t src, size_t vl) { @@ -149,7 +149,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4(vint32m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i64.nxv8i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i64.nxv8i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8(vint32m4_t src, size_t vl) { @@ -159,7 +159,7 @@ vint64m8_t test_vwcvt_x_x_v_i64m8(vint32m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_m(vbool64_t mask, vint8mf8_t src, size_t vl) { @@ -169,7 +169,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_m(vbool64_t mask, vint8mf8_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_m(vbool32_t mask, vint8mf4_t src, size_t vl) { @@ -179,7 +179,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_m(vbool32_t mask, vint8mf4_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_m(vbool16_t mask, vint8mf2_t src, size_t vl) { @@ -189,7 +189,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_m(vbool16_t mask, vint8mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_m(vbool8_t mask, vint8m1_t src, size_t vl) { @@ -199,7 +199,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_m(vbool8_t mask, vint8m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_m(vbool4_t mask, vint8m2_t src, size_t vl) { @@ -209,7 +209,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_m(vbool4_t mask, vint8m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_m(vbool2_t mask, vint8m4_t src, size_t vl) { @@ -219,7 +219,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_m(vbool2_t mask, vint8m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_m(vbool64_t mask, vint16mf4_t src, size_t vl) { @@ -229,7 +229,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_m(vbool64_t mask, vint16mf4_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_m(vbool32_t mask, vint16mf2_t src, size_t vl) { @@ -239,7 +239,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_m(vbool32_t mask, vint16mf2_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_m(vbool16_t mask, vint16m1_t src, size_t vl) { @@ -249,7 +249,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_m(vbool16_t mask, vint16m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_m(vbool8_t mask, vint16m2_t src, size_t vl) { @@ -259,7 +259,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_m(vbool8_t mask, vint16m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_m(vbool4_t mask, vint16m4_t src, size_t vl) { @@ -269,7 +269,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_m(vbool4_t mask, vint16m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_m(vbool64_t mask, vint32mf2_t src, size_t vl) { @@ -279,7 +279,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_m(vbool64_t mask, vint32mf2_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_m(vbool32_t mask, vint32m1_t src, size_t vl) { @@ -289,7 +289,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_m(vbool32_t mask, vint32m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_m(vbool16_t mask, vint32m2_t src, size_t vl) { @@ -299,7 +299,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_m(vbool16_t mask, vint32m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_m(vbool8_t mask, vint32m4_t src, size_t vl) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwcvtu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwcvtu.c index 555888d94980c..96e47f105be87 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwcvtu.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwcvtu.c @@ -9,7 +9,7 @@ // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4(vuint8mf8_t src, size_t vl) { @@ -19,7 +19,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4(vuint8mf8_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i16.nxv2i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i16.nxv2i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2(vuint8mf4_t src, size_t vl) { @@ -29,7 +29,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2(vuint8mf4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i16.nxv4i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i16.nxv4i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1(vuint8mf2_t src, size_t vl) { @@ -39,7 +39,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1(vuint8mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2(vuint8m1_t src, size_t vl) { @@ -49,7 +49,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2(vuint8m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4(vuint8m2_t src, size_t vl) { @@ -59,7 +59,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4(vuint8m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i8.i64( poison, [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8(vuint8m4_t src, size_t vl) { @@ -69,7 +69,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8(vuint8m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i32.nxv1i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i32.nxv1i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2(vuint16mf4_t src, size_t vl) { @@ -79,7 +79,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2(vuint16mf4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i32.nxv2i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i32.nxv2i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1(vuint16mf2_t src, size_t vl) { @@ -89,7 +89,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1(vuint16mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2(vuint16m1_t src, size_t vl) { @@ -99,7 +99,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2(vuint16m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4(vuint16m2_t src, size_t vl) { @@ -109,7 +109,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4(vuint16m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i16.i64( poison, [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8(vuint16m4_t src, size_t vl) { @@ -119,7 +119,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8(vuint16m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i64.nxv1i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i64.nxv1i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1(vuint32mf2_t src, size_t vl) { @@ -129,7 +129,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1(vuint32mf2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2(vuint32m1_t src, size_t vl) { @@ -139,7 +139,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2(vuint32m1_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4(vuint32m2_t src, size_t vl) { @@ -149,7 +149,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4(vuint32m2_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8 // CHECK-RV64-SAME: ( [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i64.i64( poison, [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i32.i64( poison, [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8(vuint32m4_t src, size_t vl) { @@ -159,7 +159,7 @@ vuint64m8_t test_vwcvtu_x_x_v_u64m8(vuint32m4_t src, size_t vl) { // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_m(vbool64_t mask, vuint8mf8_t src, size_t vl) { @@ -169,7 +169,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_m(vbool64_t mask, vuint8mf8_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_m(vbool32_t mask, vuint8mf4_t src, size_t vl) { @@ -179,7 +179,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_m(vbool32_t mask, vuint8mf4_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_m(vbool16_t mask, vuint8mf2_t src, size_t vl) { @@ -189,7 +189,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_m(vbool16_t mask, vuint8mf2_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_m(vbool8_t mask, vuint8m1_t src, size_t vl) { @@ -199,7 +199,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_m(vbool8_t mask, vuint8m1_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_m(vbool4_t mask, vuint8m2_t src, size_t vl) { @@ -209,7 +209,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_m(vbool4_t mask, vuint8m2_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i8.i64( poison, [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_m(vbool2_t mask, vuint8m4_t src, size_t vl) { @@ -219,7 +219,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_m(vbool2_t mask, vuint8m4_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_m(vbool64_t mask, vuint16mf4_t src, size_t vl) { @@ -229,7 +229,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_m(vbool64_t mask, vuint16mf4_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_m(vbool32_t mask, vuint16mf2_t src, size_t vl) { @@ -239,7 +239,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_m(vbool32_t mask, vuint16mf2_t src, size_t v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_m(vbool16_t mask, vuint16m1_t src, size_t vl) { @@ -249,7 +249,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_m(vbool16_t mask, vuint16m1_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_m(vbool8_t mask, vuint16m2_t src, size_t vl) { @@ -259,7 +259,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_m(vbool8_t mask, vuint16m2_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i16.i64( poison, [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_m(vbool4_t mask, vuint16m4_t src, size_t vl) { @@ -269,7 +269,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_m(vbool4_t mask, vuint16m4_t src, size_t vl) // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_m(vbool64_t mask, vuint32mf2_t src, size_t vl) { @@ -279,7 +279,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_m(vbool64_t mask, vuint32mf2_t src, size_t v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_m(vbool32_t mask, vuint32m1_t src, size_t vl) { @@ -289,7 +289,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_m(vbool32_t mask, vuint32m1_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_m(vbool16_t mask, vuint32m2_t src, size_t vl) { @@ -299,7 +299,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_m(vbool16_t mask, vuint32m2_t src, size_t vl // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_m // CHECK-RV64-SAME: ( [[MASK:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i64.i64( poison, [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i32.i64( poison, [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 3) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_m(vbool8_t mask, vuint32m4_t src, size_t vl) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vwcvt.c index 783c449e0289d..50066814e8606 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vwcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vwcvt.c @@ -9,7 +9,7 @@ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_tu(vint16mf4_t maskedoff, vint8mf8_t src, size_t vl) { @@ -19,7 +19,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_tu(vint16mf4_t maskedoff, vint8mf8_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_tu(vint16mf2_t maskedoff, vint8mf4_t src, size_t vl) { @@ -29,7 +29,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_tu(vint16mf2_t maskedoff, vint8mf4_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_tu(vint16m1_t maskedoff, vint8mf2_t src, size_t vl) { @@ -39,7 +39,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_tu(vint16m1_t maskedoff, vint8mf2_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_tu(vint16m2_t maskedoff, vint8m1_t src, size_t vl) { @@ -49,7 +49,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_tu(vint16m2_t maskedoff, vint8m1_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_tu(vint16m4_t maskedoff, vint8m2_t src, size_t vl) { @@ -59,7 +59,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_tu(vint16m4_t maskedoff, vint8m2_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_tu(vint16m8_t maskedoff, vint8m4_t src, size_t vl) { @@ -69,7 +69,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_tu(vint16m8_t maskedoff, vint8m4_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_tu(vint32mf2_t maskedoff, vint16mf4_t src, size_t vl) { @@ -79,7 +79,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_tu(vint32mf2_t maskedoff, vint16mf4_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_tu(vint32m1_t maskedoff, vint16mf2_t src, size_t vl) { @@ -89,7 +89,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_tu(vint32m1_t maskedoff, vint16mf2_t src, size // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_tu(vint32m2_t maskedoff, vint16m1_t src, size_t vl) { @@ -99,7 +99,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_tu(vint32m2_t maskedoff, vint16m1_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_tu(vint32m4_t maskedoff, vint16m2_t src, size_t vl) { @@ -109,7 +109,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_tu(vint32m4_t maskedoff, vint16m2_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_tu(vint32m8_t maskedoff, vint16m4_t src, size_t vl) { @@ -119,7 +119,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_tu(vint32m8_t maskedoff, vint16m4_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_tu(vint64m1_t maskedoff, vint32mf2_t src, size_t vl) { @@ -129,7 +129,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_tu(vint64m1_t maskedoff, vint32mf2_t src, size // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_tu(vint64m2_t maskedoff, vint32m1_t src, size_t vl) { @@ -139,7 +139,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_tu(vint64m2_t maskedoff, vint32m1_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_tu(vint64m4_t maskedoff, vint32m2_t src, size_t vl) { @@ -149,7 +149,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_tu(vint64m4_t maskedoff, vint32m2_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_tu(vint64m8_t maskedoff, vint32m4_t src, size_t vl) { @@ -159,7 +159,7 @@ vint64m8_t test_vwcvt_x_x_v_i64m8_tu(vint64m8_t maskedoff, vint32m4_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_tum(vbool64_t mask, vint16mf4_t maskedoff, vint8mf8_t src, size_t vl) { @@ -169,7 +169,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_tum(vbool64_t mask, vint16mf4_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_tum(vbool32_t mask, vint16mf2_t maskedoff, vint8mf4_t src, size_t vl) { @@ -179,7 +179,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_tum(vbool32_t mask, vint16mf2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_tum(vbool16_t mask, vint16m1_t maskedoff, vint8mf2_t src, size_t vl) { @@ -189,7 +189,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_tum(vbool16_t mask, vint16m1_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_tum(vbool8_t mask, vint16m2_t maskedoff, vint8m1_t src, size_t vl) { @@ -199,7 +199,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_tum(vbool8_t mask, vint16m2_t maskedoff, vint8 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_tum(vbool4_t mask, vint16m4_t maskedoff, vint8m2_t src, size_t vl) { @@ -209,7 +209,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_tum(vbool4_t mask, vint16m4_t maskedoff, vint8 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_tum(vbool2_t mask, vint16m8_t maskedoff, vint8m4_t src, size_t vl) { @@ -219,7 +219,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_tum(vbool2_t mask, vint16m8_t maskedoff, vint8 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_tum(vbool64_t mask, vint32mf2_t maskedoff, vint16mf4_t src, size_t vl) { @@ -229,7 +229,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_tum(vbool64_t mask, vint32mf2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_tum(vbool32_t mask, vint32m1_t maskedoff, vint16mf2_t src, size_t vl) { @@ -239,7 +239,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_tum(vbool32_t mask, vint32m1_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_tum(vbool16_t mask, vint32m2_t maskedoff, vint16m1_t src, size_t vl) { @@ -249,7 +249,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_tum(vbool16_t mask, vint32m2_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_tum(vbool8_t mask, vint32m4_t maskedoff, vint16m2_t src, size_t vl) { @@ -259,7 +259,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_tum(vbool8_t mask, vint32m4_t maskedoff, vint1 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_tum(vbool4_t mask, vint32m8_t maskedoff, vint16m4_t src, size_t vl) { @@ -269,7 +269,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_tum(vbool4_t mask, vint32m8_t maskedoff, vint1 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_tum(vbool64_t mask, vint64m1_t maskedoff, vint32mf2_t src, size_t vl) { @@ -279,7 +279,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_tum(vbool64_t mask, vint64m1_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_tum(vbool32_t mask, vint64m2_t maskedoff, vint32m1_t src, size_t vl) { @@ -289,7 +289,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_tum(vbool32_t mask, vint64m2_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_tum(vbool16_t mask, vint64m4_t maskedoff, vint32m2_t src, size_t vl) { @@ -299,7 +299,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_tum(vbool16_t mask, vint64m4_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_tum(vbool8_t mask, vint64m8_t maskedoff, vint32m4_t src, size_t vl) { @@ -309,7 +309,7 @@ vint64m8_t test_vwcvt_x_x_v_i64m8_tum(vbool8_t mask, vint64m8_t maskedoff, vint3 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_tumu(vbool64_t mask, vint16mf4_t maskedoff, vint8mf8_t src, size_t vl) { @@ -319,7 +319,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_tumu(vbool64_t mask, vint16mf4_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_tumu(vbool32_t mask, vint16mf2_t maskedoff, vint8mf4_t src, size_t vl) { @@ -329,7 +329,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_tumu(vbool32_t mask, vint16mf2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_tumu(vbool16_t mask, vint16m1_t maskedoff, vint8mf2_t src, size_t vl) { @@ -339,7 +339,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_tumu(vbool16_t mask, vint16m1_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_tumu(vbool8_t mask, vint16m2_t maskedoff, vint8m1_t src, size_t vl) { @@ -349,7 +349,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_tumu(vbool8_t mask, vint16m2_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_tumu(vbool4_t mask, vint16m4_t maskedoff, vint8m2_t src, size_t vl) { @@ -359,7 +359,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_tumu(vbool4_t mask, vint16m4_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_tumu(vbool2_t mask, vint16m8_t maskedoff, vint8m4_t src, size_t vl) { @@ -369,7 +369,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_tumu(vbool2_t mask, vint16m8_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_tumu(vbool64_t mask, vint32mf2_t maskedoff, vint16mf4_t src, size_t vl) { @@ -379,7 +379,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_tumu(vbool64_t mask, vint32mf2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_tumu(vbool32_t mask, vint32m1_t maskedoff, vint16mf2_t src, size_t vl) { @@ -389,7 +389,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_tumu(vbool32_t mask, vint32m1_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_tumu(vbool16_t mask, vint32m2_t maskedoff, vint16m1_t src, size_t vl) { @@ -399,7 +399,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_tumu(vbool16_t mask, vint32m2_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_tumu(vbool8_t mask, vint32m4_t maskedoff, vint16m2_t src, size_t vl) { @@ -409,7 +409,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_tumu(vbool8_t mask, vint32m4_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_tumu(vbool4_t mask, vint32m8_t maskedoff, vint16m4_t src, size_t vl) { @@ -419,7 +419,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_tumu(vbool4_t mask, vint32m8_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_tumu(vbool64_t mask, vint64m1_t maskedoff, vint32mf2_t src, size_t vl) { @@ -429,7 +429,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_tumu(vbool64_t mask, vint64m1_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_tumu(vbool32_t mask, vint64m2_t maskedoff, vint32m1_t src, size_t vl) { @@ -439,7 +439,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_tumu(vbool32_t mask, vint64m2_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_tumu(vbool16_t mask, vint64m4_t maskedoff, vint32m2_t src, size_t vl) { @@ -449,7 +449,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_tumu(vbool16_t mask, vint64m4_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_tumu(vbool8_t mask, vint64m8_t maskedoff, vint32m4_t src, size_t vl) { @@ -459,7 +459,7 @@ vint64m8_t test_vwcvt_x_x_v_i64m8_tumu(vbool8_t mask, vint64m8_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_mu(vbool64_t mask, vint16mf4_t maskedoff, vint8mf8_t src, size_t vl) { @@ -469,7 +469,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_mu(vbool64_t mask, vint16mf4_t maskedoff, vi // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_mu(vbool32_t mask, vint16mf2_t maskedoff, vint8mf4_t src, size_t vl) { @@ -479,7 +479,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_mu(vbool32_t mask, vint16mf2_t maskedoff, vi // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_mu(vbool16_t mask, vint16m1_t maskedoff, vint8mf2_t src, size_t vl) { @@ -489,7 +489,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_mu(vbool16_t mask, vint16m1_t maskedoff, vint8 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_mu(vbool8_t mask, vint16m2_t maskedoff, vint8m1_t src, size_t vl) { @@ -499,7 +499,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_mu(vbool8_t mask, vint16m2_t maskedoff, vint8m // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_mu(vbool4_t mask, vint16m4_t maskedoff, vint8m2_t src, size_t vl) { @@ -509,7 +509,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_mu(vbool4_t mask, vint16m4_t maskedoff, vint8m // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_mu(vbool2_t mask, vint16m8_t maskedoff, vint8m4_t src, size_t vl) { @@ -519,7 +519,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_mu(vbool2_t mask, vint16m8_t maskedoff, vint8m // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_mu(vbool64_t mask, vint32mf2_t maskedoff, vint16mf4_t src, size_t vl) { @@ -529,7 +529,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_mu(vbool64_t mask, vint32mf2_t maskedoff, vi // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_mu(vbool32_t mask, vint32m1_t maskedoff, vint16mf2_t src, size_t vl) { @@ -539,7 +539,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_mu(vbool32_t mask, vint32m1_t maskedoff, vint1 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_mu(vbool16_t mask, vint32m2_t maskedoff, vint16m1_t src, size_t vl) { @@ -549,7 +549,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_mu(vbool16_t mask, vint32m2_t maskedoff, vint1 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_mu(vbool8_t mask, vint32m4_t maskedoff, vint16m2_t src, size_t vl) { @@ -559,7 +559,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_mu(vbool8_t mask, vint32m4_t maskedoff, vint16 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_mu(vbool4_t mask, vint32m8_t maskedoff, vint16m4_t src, size_t vl) { @@ -569,7 +569,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_mu(vbool4_t mask, vint32m8_t maskedoff, vint16 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_mu(vbool64_t mask, vint64m1_t maskedoff, vint32mf2_t src, size_t vl) { @@ -579,7 +579,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_mu(vbool64_t mask, vint64m1_t maskedoff, vint3 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_mu(vbool32_t mask, vint64m2_t maskedoff, vint32m1_t src, size_t vl) { @@ -589,7 +589,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_mu(vbool32_t mask, vint64m2_t maskedoff, vint3 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_mu(vbool16_t mask, vint64m4_t maskedoff, vint32m2_t src, size_t vl) { @@ -599,7 +599,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_mu(vbool16_t mask, vint64m4_t maskedoff, vint3 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_mu(vbool8_t mask, vint64m8_t maskedoff, vint32m4_t src, size_t vl) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vwcvtu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vwcvtu.c index 3858d5b3e2287..45dfce31bb90a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vwcvtu.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vwcvtu.c @@ -9,7 +9,7 @@ // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint8mf8_t src, size_t vl) { @@ -19,7 +19,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint8mf8_t src // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint8mf4_t src, size_t vl) { @@ -29,7 +29,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint8mf4_t src // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_tu(vuint16m1_t maskedoff, vuint8mf2_t src, size_t vl) { @@ -39,7 +39,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_tu(vuint16m1_t maskedoff, vuint8mf2_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_tu(vuint16m2_t maskedoff, vuint8m1_t src, size_t vl) { @@ -49,7 +49,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_tu(vuint16m2_t maskedoff, vuint8m1_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_tu(vuint16m4_t maskedoff, vuint8m2_t src, size_t vl) { @@ -59,7 +59,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_tu(vuint16m4_t maskedoff, vuint8m2_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_tu(vuint16m8_t maskedoff, vuint8m4_t src, size_t vl) { @@ -69,7 +69,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_tu(vuint16m8_t maskedoff, vuint8m4_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint16mf4_t src, size_t vl) { @@ -79,7 +79,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint16mf4_t sr // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_tu(vuint32m1_t maskedoff, vuint16mf2_t src, size_t vl) { @@ -89,7 +89,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_tu(vuint32m1_t maskedoff, vuint16mf2_t src, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_tu(vuint32m2_t maskedoff, vuint16m1_t src, size_t vl) { @@ -99,7 +99,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_tu(vuint32m2_t maskedoff, vuint16m1_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_tu(vuint32m4_t maskedoff, vuint16m2_t src, size_t vl) { @@ -109,7 +109,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_tu(vuint32m4_t maskedoff, vuint16m2_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_tu(vuint32m8_t maskedoff, vuint16m4_t src, size_t vl) { @@ -119,7 +119,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_tu(vuint32m8_t maskedoff, vuint16m4_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_tu(vuint64m1_t maskedoff, vuint32mf2_t src, size_t vl) { @@ -129,7 +129,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_tu(vuint64m1_t maskedoff, vuint32mf2_t src, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_tu(vuint64m2_t maskedoff, vuint32m1_t src, size_t vl) { @@ -139,7 +139,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_tu(vuint64m2_t maskedoff, vuint32m1_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_tu(vuint64m4_t maskedoff, vuint32m2_t src, size_t vl) { @@ -149,7 +149,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_tu(vuint64m4_t maskedoff, vuint32m2_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_tu(vuint64m8_t maskedoff, vuint32m4_t src, size_t vl) { @@ -159,7 +159,7 @@ vuint64m8_t test_vwcvtu_x_x_v_u64m8_tu(vuint64m8_t maskedoff, vuint32m4_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint8mf8_t src, size_t vl) { @@ -169,7 +169,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint8mf4_t src, size_t vl) { @@ -179,7 +179,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint8mf2_t src, size_t vl) { @@ -189,7 +189,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint8m1_t src, size_t vl) { @@ -199,7 +199,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint8m2_t src, size_t vl) { @@ -209,7 +209,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint8m4_t src, size_t vl) { @@ -219,7 +219,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint16mf4_t src, size_t vl) { @@ -229,7 +229,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint16mf2_t src, size_t vl) { @@ -239,7 +239,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint16m1_t src, size_t vl) { @@ -249,7 +249,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint16m2_t src, size_t vl) { @@ -259,7 +259,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint16m4_t src, size_t vl) { @@ -269,7 +269,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint32mf2_t src, size_t vl) { @@ -279,7 +279,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint32m1_t src, size_t vl) { @@ -289,7 +289,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint32m2_t src, size_t vl) { @@ -299,7 +299,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint32m4_t src, size_t vl) { @@ -309,7 +309,7 @@ vuint64m8_t test_vwcvtu_x_x_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint8mf8_t src, size_t vl) { @@ -319,7 +319,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedof // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint8mf4_t src, size_t vl) { @@ -329,7 +329,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedof // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint8mf2_t src, size_t vl) { @@ -339,7 +339,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint8m1_t src, size_t vl) { @@ -349,7 +349,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint8m2_t src, size_t vl) { @@ -359,7 +359,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint8m4_t src, size_t vl) { @@ -369,7 +369,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint16mf4_t src, size_t vl) { @@ -379,7 +379,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedof // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint16mf2_t src, size_t vl) { @@ -389,7 +389,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint16m1_t src, size_t vl) { @@ -399,7 +399,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint16m2_t src, size_t vl) { @@ -409,7 +409,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint16m4_t src, size_t vl) { @@ -419,7 +419,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint32mf2_t src, size_t vl) { @@ -429,7 +429,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint32m1_t src, size_t vl) { @@ -439,7 +439,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint32m2_t src, size_t vl) { @@ -449,7 +449,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint32m4_t src, size_t vl) { @@ -459,7 +459,7 @@ vuint64m8_t test_vwcvtu_x_x_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint8mf8_t src, size_t vl) { @@ -469,7 +469,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint8mf4_t src, size_t vl) { @@ -479,7 +479,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint8mf2_t src, size_t vl) { @@ -489,7 +489,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint8m1_t src, size_t vl) { @@ -499,7 +499,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint8m2_t src, size_t vl) { @@ -509,7 +509,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint8m4_t src, size_t vl) { @@ -519,7 +519,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint16mf4_t src, size_t vl) { @@ -529,7 +529,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint16mf2_t src, size_t vl) { @@ -539,7 +539,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint16m1_t src, size_t vl) { @@ -549,7 +549,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint16m2_t src, size_t vl) { @@ -559,7 +559,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint16m4_t src, size_t vl) { @@ -569,7 +569,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint32mf2_t src, size_t vl) { @@ -579,7 +579,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint32m1_t src, size_t vl) { @@ -589,7 +589,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint32m2_t src, size_t vl) { @@ -599,7 +599,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint32m4_t src, size_t vl) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vwcvt.c index 4376c5d9860e0..4e40521d59137 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vwcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vwcvt.c @@ -9,7 +9,7 @@ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_tu(vint16mf4_t maskedoff, vint8mf8_t src, size_t vl) { @@ -19,7 +19,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_tu(vint16mf4_t maskedoff, vint8mf8_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_tu(vint16mf2_t maskedoff, vint8mf4_t src, size_t vl) { @@ -29,7 +29,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_tu(vint16mf2_t maskedoff, vint8mf4_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_tu(vint16m1_t maskedoff, vint8mf2_t src, size_t vl) { @@ -39,7 +39,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_tu(vint16m1_t maskedoff, vint8mf2_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_tu(vint16m2_t maskedoff, vint8m1_t src, size_t vl) { @@ -49,7 +49,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_tu(vint16m2_t maskedoff, vint8m1_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_tu(vint16m4_t maskedoff, vint8m2_t src, size_t vl) { @@ -59,7 +59,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_tu(vint16m4_t maskedoff, vint8m2_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_tu(vint16m8_t maskedoff, vint8m4_t src, size_t vl) { @@ -69,7 +69,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_tu(vint16m8_t maskedoff, vint8m4_t src, size_t // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_tu(vint32mf2_t maskedoff, vint16mf4_t src, size_t vl) { @@ -79,7 +79,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_tu(vint32mf2_t maskedoff, vint16mf4_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_tu(vint32m1_t maskedoff, vint16mf2_t src, size_t vl) { @@ -89,7 +89,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_tu(vint32m1_t maskedoff, vint16mf2_t src, size // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_tu(vint32m2_t maskedoff, vint16m1_t src, size_t vl) { @@ -99,7 +99,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_tu(vint32m2_t maskedoff, vint16m1_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_tu(vint32m4_t maskedoff, vint16m2_t src, size_t vl) { @@ -109,7 +109,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_tu(vint32m4_t maskedoff, vint16m2_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_tu(vint32m8_t maskedoff, vint16m4_t src, size_t vl) { @@ -119,7 +119,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_tu(vint32m8_t maskedoff, vint16m4_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_tu(vint64m1_t maskedoff, vint32mf2_t src, size_t vl) { @@ -129,7 +129,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_tu(vint64m1_t maskedoff, vint32mf2_t src, size // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_tu(vint64m2_t maskedoff, vint32m1_t src, size_t vl) { @@ -139,7 +139,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_tu(vint64m2_t maskedoff, vint32m1_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_tu(vint64m4_t maskedoff, vint32m2_t src, size_t vl) { @@ -149,7 +149,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_tu(vint64m4_t maskedoff, vint32m2_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_tu(vint64m8_t maskedoff, vint32m4_t src, size_t vl) { @@ -159,7 +159,7 @@ vint64m8_t test_vwcvt_x_x_v_i64m8_tu(vint64m8_t maskedoff, vint32m4_t src, size_ // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_tum(vbool64_t mask, vint16mf4_t maskedoff, vint8mf8_t src, size_t vl) { @@ -169,7 +169,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_tum(vbool64_t mask, vint16mf4_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_tum(vbool32_t mask, vint16mf2_t maskedoff, vint8mf4_t src, size_t vl) { @@ -179,7 +179,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_tum(vbool32_t mask, vint16mf2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_tum(vbool16_t mask, vint16m1_t maskedoff, vint8mf2_t src, size_t vl) { @@ -189,7 +189,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_tum(vbool16_t mask, vint16m1_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_tum(vbool8_t mask, vint16m2_t maskedoff, vint8m1_t src, size_t vl) { @@ -199,7 +199,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_tum(vbool8_t mask, vint16m2_t maskedoff, vint8 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_tum(vbool4_t mask, vint16m4_t maskedoff, vint8m2_t src, size_t vl) { @@ -209,7 +209,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_tum(vbool4_t mask, vint16m4_t maskedoff, vint8 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_tum(vbool2_t mask, vint16m8_t maskedoff, vint8m4_t src, size_t vl) { @@ -219,7 +219,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_tum(vbool2_t mask, vint16m8_t maskedoff, vint8 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_tum(vbool64_t mask, vint32mf2_t maskedoff, vint16mf4_t src, size_t vl) { @@ -229,7 +229,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_tum(vbool64_t mask, vint32mf2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_tum(vbool32_t mask, vint32m1_t maskedoff, vint16mf2_t src, size_t vl) { @@ -239,7 +239,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_tum(vbool32_t mask, vint32m1_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_tum(vbool16_t mask, vint32m2_t maskedoff, vint16m1_t src, size_t vl) { @@ -249,7 +249,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_tum(vbool16_t mask, vint32m2_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_tum(vbool8_t mask, vint32m4_t maskedoff, vint16m2_t src, size_t vl) { @@ -259,7 +259,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_tum(vbool8_t mask, vint32m4_t maskedoff, vint1 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_tum(vbool4_t mask, vint32m8_t maskedoff, vint16m4_t src, size_t vl) { @@ -269,7 +269,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_tum(vbool4_t mask, vint32m8_t maskedoff, vint1 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_tum(vbool64_t mask, vint64m1_t maskedoff, vint32mf2_t src, size_t vl) { @@ -279,7 +279,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_tum(vbool64_t mask, vint64m1_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_tum(vbool32_t mask, vint64m2_t maskedoff, vint32m1_t src, size_t vl) { @@ -289,7 +289,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_tum(vbool32_t mask, vint64m2_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_tum(vbool16_t mask, vint64m4_t maskedoff, vint32m2_t src, size_t vl) { @@ -299,7 +299,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_tum(vbool16_t mask, vint64m4_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_tum(vbool8_t mask, vint64m8_t maskedoff, vint32m4_t src, size_t vl) { @@ -309,7 +309,7 @@ vint64m8_t test_vwcvt_x_x_v_i64m8_tum(vbool8_t mask, vint64m8_t maskedoff, vint3 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_tumu(vbool64_t mask, vint16mf4_t maskedoff, vint8mf8_t src, size_t vl) { @@ -319,7 +319,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_tumu(vbool64_t mask, vint16mf4_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_tumu(vbool32_t mask, vint16mf2_t maskedoff, vint8mf4_t src, size_t vl) { @@ -329,7 +329,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_tumu(vbool32_t mask, vint16mf2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_tumu(vbool16_t mask, vint16m1_t maskedoff, vint8mf2_t src, size_t vl) { @@ -339,7 +339,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_tumu(vbool16_t mask, vint16m1_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_tumu(vbool8_t mask, vint16m2_t maskedoff, vint8m1_t src, size_t vl) { @@ -349,7 +349,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_tumu(vbool8_t mask, vint16m2_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_tumu(vbool4_t mask, vint16m4_t maskedoff, vint8m2_t src, size_t vl) { @@ -359,7 +359,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_tumu(vbool4_t mask, vint16m4_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_tumu(vbool2_t mask, vint16m8_t maskedoff, vint8m4_t src, size_t vl) { @@ -369,7 +369,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_tumu(vbool2_t mask, vint16m8_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_tumu(vbool64_t mask, vint32mf2_t maskedoff, vint16mf4_t src, size_t vl) { @@ -379,7 +379,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_tumu(vbool64_t mask, vint32mf2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_tumu(vbool32_t mask, vint32m1_t maskedoff, vint16mf2_t src, size_t vl) { @@ -389,7 +389,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_tumu(vbool32_t mask, vint32m1_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_tumu(vbool16_t mask, vint32m2_t maskedoff, vint16m1_t src, size_t vl) { @@ -399,7 +399,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_tumu(vbool16_t mask, vint32m2_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_tumu(vbool8_t mask, vint32m4_t maskedoff, vint16m2_t src, size_t vl) { @@ -409,7 +409,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_tumu(vbool8_t mask, vint32m4_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_tumu(vbool4_t mask, vint32m8_t maskedoff, vint16m4_t src, size_t vl) { @@ -419,7 +419,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_tumu(vbool4_t mask, vint32m8_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_tumu(vbool64_t mask, vint64m1_t maskedoff, vint32mf2_t src, size_t vl) { @@ -429,7 +429,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_tumu(vbool64_t mask, vint64m1_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_tumu(vbool32_t mask, vint64m2_t maskedoff, vint32m1_t src, size_t vl) { @@ -439,7 +439,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_tumu(vbool32_t mask, vint64m2_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_tumu(vbool16_t mask, vint64m4_t maskedoff, vint32m2_t src, size_t vl) { @@ -449,7 +449,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_tumu(vbool16_t mask, vint64m4_t maskedoff, vin // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_tumu(vbool8_t mask, vint64m8_t maskedoff, vint32m4_t src, size_t vl) { @@ -459,7 +459,7 @@ vint64m8_t test_vwcvt_x_x_v_i64m8_tumu(vbool8_t mask, vint64m8_t maskedoff, vint // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf4_t test_vwcvt_x_x_v_i16mf4_mu(vbool64_t mask, vint16mf4_t maskedoff, vint8mf8_t src, size_t vl) { @@ -469,7 +469,7 @@ vint16mf4_t test_vwcvt_x_x_v_i16mf4_mu(vbool64_t mask, vint16mf4_t maskedoff, vi // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16mf2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16mf2_t test_vwcvt_x_x_v_i16mf2_mu(vbool32_t mask, vint16mf2_t maskedoff, vint8mf4_t src, size_t vl) { @@ -479,7 +479,7 @@ vint16mf2_t test_vwcvt_x_x_v_i16mf2_mu(vbool32_t mask, vint16mf2_t maskedoff, vi // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m1_t test_vwcvt_x_x_v_i16m1_mu(vbool16_t mask, vint16m1_t maskedoff, vint8mf2_t src, size_t vl) { @@ -489,7 +489,7 @@ vint16m1_t test_vwcvt_x_x_v_i16m1_mu(vbool16_t mask, vint16m1_t maskedoff, vint8 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m2_t test_vwcvt_x_x_v_i16m2_mu(vbool8_t mask, vint16m2_t maskedoff, vint8m1_t src, size_t vl) { @@ -499,7 +499,7 @@ vint16m2_t test_vwcvt_x_x_v_i16m2_mu(vbool8_t mask, vint16m2_t maskedoff, vint8m // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m4_t test_vwcvt_x_x_v_i16m4_mu(vbool4_t mask, vint16m4_t maskedoff, vint8m2_t src, size_t vl) { @@ -509,7 +509,7 @@ vint16m4_t test_vwcvt_x_x_v_i16m4_mu(vbool4_t mask, vint16m4_t maskedoff, vint8m // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i16m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint16m8_t test_vwcvt_x_x_v_i16m8_mu(vbool2_t mask, vint16m8_t maskedoff, vint8m4_t src, size_t vl) { @@ -519,7 +519,7 @@ vint16m8_t test_vwcvt_x_x_v_i16m8_mu(vbool2_t mask, vint16m8_t maskedoff, vint8m // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32mf2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32mf2_t test_vwcvt_x_x_v_i32mf2_mu(vbool64_t mask, vint32mf2_t maskedoff, vint16mf4_t src, size_t vl) { @@ -529,7 +529,7 @@ vint32mf2_t test_vwcvt_x_x_v_i32mf2_mu(vbool64_t mask, vint32mf2_t maskedoff, vi // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m1_t test_vwcvt_x_x_v_i32m1_mu(vbool32_t mask, vint32m1_t maskedoff, vint16mf2_t src, size_t vl) { @@ -539,7 +539,7 @@ vint32m1_t test_vwcvt_x_x_v_i32m1_mu(vbool32_t mask, vint32m1_t maskedoff, vint1 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m2_t test_vwcvt_x_x_v_i32m2_mu(vbool16_t mask, vint32m2_t maskedoff, vint16m1_t src, size_t vl) { @@ -549,7 +549,7 @@ vint32m2_t test_vwcvt_x_x_v_i32m2_mu(vbool16_t mask, vint32m2_t maskedoff, vint1 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m4_t test_vwcvt_x_x_v_i32m4_mu(vbool8_t mask, vint32m4_t maskedoff, vint16m2_t src, size_t vl) { @@ -559,7 +559,7 @@ vint32m4_t test_vwcvt_x_x_v_i32m4_mu(vbool8_t mask, vint32m4_t maskedoff, vint16 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i32m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint32m8_t test_vwcvt_x_x_v_i32m8_mu(vbool4_t mask, vint32m8_t maskedoff, vint16m4_t src, size_t vl) { @@ -569,7 +569,7 @@ vint32m8_t test_vwcvt_x_x_v_i32m8_mu(vbool4_t mask, vint32m8_t maskedoff, vint16 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m1_t test_vwcvt_x_x_v_i64m1_mu(vbool64_t mask, vint64m1_t maskedoff, vint32mf2_t src, size_t vl) { @@ -579,7 +579,7 @@ vint64m1_t test_vwcvt_x_x_v_i64m1_mu(vbool64_t mask, vint64m1_t maskedoff, vint3 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m2_t test_vwcvt_x_x_v_i64m2_mu(vbool32_t mask, vint64m2_t maskedoff, vint32m1_t src, size_t vl) { @@ -589,7 +589,7 @@ vint64m2_t test_vwcvt_x_x_v_i64m2_mu(vbool32_t mask, vint64m2_t maskedoff, vint3 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m4_t test_vwcvt_x_x_v_i64m4_mu(vbool16_t mask, vint64m4_t maskedoff, vint32m2_t src, size_t vl) { @@ -599,7 +599,7 @@ vint64m4_t test_vwcvt_x_x_v_i64m4_mu(vbool16_t mask, vint64m4_t maskedoff, vint3 // CHECK-RV64-LABEL: define dso_local @test_vwcvt_x_x_v_i64m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwadd.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vint64m8_t test_vwcvt_x_x_v_i64m8_mu(vbool8_t mask, vint64m8_t maskedoff, vint32m4_t src, size_t vl) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vwcvtu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vwcvtu.c index a0175dfc4ddbb..006e51b499e24 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vwcvtu.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vwcvtu.c @@ -9,7 +9,7 @@ // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint8mf8_t src, size_t vl) { @@ -19,7 +19,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tu(vuint16mf4_t maskedoff, vuint8mf8_t src // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint8mf4_t src, size_t vl) { @@ -29,7 +29,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tu(vuint16mf2_t maskedoff, vuint8mf4_t src // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_tu(vuint16m1_t maskedoff, vuint8mf2_t src, size_t vl) { @@ -39,7 +39,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_tu(vuint16m1_t maskedoff, vuint8mf2_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_tu(vuint16m2_t maskedoff, vuint8m1_t src, size_t vl) { @@ -49,7 +49,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_tu(vuint16m2_t maskedoff, vuint8m1_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_tu(vuint16m4_t maskedoff, vuint8m2_t src, size_t vl) { @@ -59,7 +59,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_tu(vuint16m4_t maskedoff, vuint8m2_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_tu(vuint16m8_t maskedoff, vuint8m4_t src, size_t vl) { @@ -69,7 +69,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_tu(vuint16m8_t maskedoff, vuint8m4_t src, si // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint16mf4_t src, size_t vl) { @@ -79,7 +79,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tu(vuint32mf2_t maskedoff, vuint16mf4_t sr // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_tu(vuint32m1_t maskedoff, vuint16mf2_t src, size_t vl) { @@ -89,7 +89,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_tu(vuint32m1_t maskedoff, vuint16mf2_t src, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_tu(vuint32m2_t maskedoff, vuint16m1_t src, size_t vl) { @@ -99,7 +99,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_tu(vuint32m2_t maskedoff, vuint16m1_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_tu(vuint32m4_t maskedoff, vuint16m2_t src, size_t vl) { @@ -109,7 +109,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_tu(vuint32m4_t maskedoff, vuint16m2_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_tu(vuint32m8_t maskedoff, vuint16m4_t src, size_t vl) { @@ -119,7 +119,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_tu(vuint32m8_t maskedoff, vuint16m4_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_tu(vuint64m1_t maskedoff, vuint32mf2_t src, size_t vl) { @@ -129,7 +129,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_tu(vuint64m1_t maskedoff, vuint32mf2_t src, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_tu(vuint64m2_t maskedoff, vuint32m1_t src, size_t vl) { @@ -139,7 +139,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_tu(vuint64m2_t maskedoff, vuint32m1_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_tu(vuint64m4_t maskedoff, vuint32m2_t src, size_t vl) { @@ -149,7 +149,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_tu(vuint64m4_t maskedoff, vuint32m2_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_tu // CHECK-RV64-SAME: ( [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, i64 [[VL]]) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_tu(vuint64m8_t maskedoff, vuint32m4_t src, size_t vl) { @@ -159,7 +159,7 @@ vuint64m8_t test_vwcvtu_x_x_v_u64m8_tu(vuint64m8_t maskedoff, vuint32m4_t src, s // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff, vuint8mf8_t src, size_t vl) { @@ -169,7 +169,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tum(vbool64_t mask, vuint16mf4_t maskedoff // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff, vuint8mf4_t src, size_t vl) { @@ -179,7 +179,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tum(vbool32_t mask, vuint16mf2_t maskedoff // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, vuint8mf2_t src, size_t vl) { @@ -189,7 +189,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_tum(vbool16_t mask, vuint16m1_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vuint8m1_t src, size_t vl) { @@ -199,7 +199,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_tum(vbool8_t mask, vuint16m2_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vuint8m2_t src, size_t vl) { @@ -209,7 +209,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_tum(vbool4_t mask, vuint16m4_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vuint8m4_t src, size_t vl) { @@ -219,7 +219,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_tum(vbool2_t mask, vuint16m8_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff, vuint16mf4_t src, size_t vl) { @@ -229,7 +229,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tum(vbool64_t mask, vuint32mf2_t maskedoff // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, vuint16mf2_t src, size_t vl) { @@ -239,7 +239,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_tum(vbool32_t mask, vuint32m1_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, vuint16m1_t src, size_t vl) { @@ -249,7 +249,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_tum(vbool16_t mask, vuint32m2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vuint16m2_t src, size_t vl) { @@ -259,7 +259,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_tum(vbool8_t mask, vuint32m4_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vuint16m4_t src, size_t vl) { @@ -269,7 +269,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_tum(vbool4_t mask, vuint32m8_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, vuint32mf2_t src, size_t vl) { @@ -279,7 +279,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_tum(vbool64_t mask, vuint64m1_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, vuint32m1_t src, size_t vl) { @@ -289,7 +289,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_tum(vbool32_t mask, vuint64m2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, vuint32m2_t src, size_t vl) { @@ -299,7 +299,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_tum(vbool16_t mask, vuint64m4_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_tum // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 2) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vuint32m4_t src, size_t vl) { @@ -309,7 +309,7 @@ vuint64m8_t test_vwcvtu_x_x_v_u64m8_tum(vbool8_t mask, vuint64m8_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint8mf8_t src, size_t vl) { @@ -319,7 +319,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedof // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint8mf4_t src, size_t vl) { @@ -329,7 +329,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedof // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, vuint8mf2_t src, size_t vl) { @@ -339,7 +339,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_tumu(vbool16_t mask, vuint16m1_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint8m1_t src, size_t vl) { @@ -349,7 +349,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, vuint8m2_t src, size_t vl) { @@ -359,7 +359,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_tumu(vbool4_t mask, vuint16m4_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, vuint8m4_t src, size_t vl) { @@ -369,7 +369,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_tumu(vbool2_t mask, vuint16m8_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedoff, vuint16mf4_t src, size_t vl) { @@ -379,7 +379,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_tumu(vbool64_t mask, vuint32mf2_t maskedof // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint16mf2_t src, size_t vl) { @@ -389,7 +389,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, vuint16m1_t src, size_t vl) { @@ -399,7 +399,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_tumu(vbool16_t mask, vuint32m2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint16m2_t src, size_t vl) { @@ -409,7 +409,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, vuint16m4_t src, size_t vl) { @@ -419,7 +419,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_tumu(vbool4_t mask, vuint32m8_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint32mf2_t src, size_t vl) { @@ -429,7 +429,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, vuint32m1_t src, size_t vl) { @@ -439,7 +439,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_tumu(vbool32_t mask, vuint64m2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint32m2_t src, size_t vl) { @@ -449,7 +449,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_tumu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 0) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint32m4_t src, size_t vl) { @@ -459,7 +459,7 @@ vuint64m8_t test_vwcvtu_x_x_v_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, v // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i16.nxv1i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, vuint8mf8_t src, size_t vl) { @@ -469,7 +469,7 @@ vuint16mf4_t test_vwcvtu_x_x_v_u16mf4_mu(vbool64_t mask, vuint16mf4_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16mf2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i16.nxv2i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, vuint8mf4_t src, size_t vl) { @@ -479,7 +479,7 @@ vuint16mf2_t test_vwcvtu_x_x_v_u16mf2_mu(vbool32_t mask, vuint16mf2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i16.nxv4i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m1_t test_vwcvtu_x_x_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vuint8mf2_t src, size_t vl) { @@ -489,7 +489,7 @@ vuint16m1_t test_vwcvtu_x_x_v_u16m1_mu(vbool16_t mask, vuint16m1_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i16.nxv8i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m2_t test_vwcvtu_x_x_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vuint8m1_t src, size_t vl) { @@ -499,7 +499,7 @@ vuint16m2_t test_vwcvtu_x_x_v_u16m2_mu(vbool8_t mask, vuint16m2_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i16.nxv16i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m4_t test_vwcvtu_x_x_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vuint8m2_t src, size_t vl) { @@ -509,7 +509,7 @@ vuint16m4_t test_vwcvtu_x_x_v_u16m4_mu(vbool4_t mask, vuint16m4_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u16m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv32i16.nxv32i8.i8.i64( [[MASKEDOFF]], [[SRC]], i8 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint16m8_t test_vwcvtu_x_x_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vuint8m4_t src, size_t vl) { @@ -519,7 +519,7 @@ vuint16m8_t test_vwcvtu_x_x_v_u16m8_mu(vbool2_t mask, vuint16m8_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32mf2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i32.nxv1i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, vuint16mf4_t src, size_t vl) { @@ -529,7 +529,7 @@ vuint32mf2_t test_vwcvtu_x_x_v_u32mf2_mu(vbool64_t mask, vuint32mf2_t maskedoff, // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i32.nxv2i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m1_t test_vwcvtu_x_x_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vuint16mf2_t src, size_t vl) { @@ -539,7 +539,7 @@ vuint32m1_t test_vwcvtu_x_x_v_u32m1_mu(vbool32_t mask, vuint32m1_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i32.nxv4i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m2_t test_vwcvtu_x_x_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vuint16m1_t src, size_t vl) { @@ -549,7 +549,7 @@ vuint32m2_t test_vwcvtu_x_x_v_u32m2_mu(vbool16_t mask, vuint32m2_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i32.nxv8i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m4_t test_vwcvtu_x_x_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vuint16m2_t src, size_t vl) { @@ -559,7 +559,7 @@ vuint32m4_t test_vwcvtu_x_x_v_u32m4_mu(vbool8_t mask, vuint32m4_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u32m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv16i32.nxv16i16.i16.i64( [[MASKEDOFF]], [[SRC]], i16 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint32m8_t test_vwcvtu_x_x_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vuint16m4_t src, size_t vl) { @@ -569,7 +569,7 @@ vuint32m8_t test_vwcvtu_x_x_v_u32m8_mu(vbool4_t mask, vuint32m8_t maskedoff, vui // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m1_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m1_t test_vwcvtu_x_x_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vuint32mf2_t src, size_t vl) { @@ -579,7 +579,7 @@ vuint64m1_t test_vwcvtu_x_x_v_u64m1_mu(vbool64_t mask, vuint64m1_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m2_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv2i64.nxv2i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m2_t test_vwcvtu_x_x_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vuint32m1_t src, size_t vl) { @@ -589,7 +589,7 @@ vuint64m2_t test_vwcvtu_x_x_v_u64m2_mu(vbool32_t mask, vuint64m2_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m4_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv4i64.nxv4i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m4_t test_vwcvtu_x_x_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vuint32m2_t src, size_t vl) { @@ -599,7 +599,7 @@ vuint64m4_t test_vwcvtu_x_x_v_u64m4_mu(vbool16_t mask, vuint64m4_t maskedoff, vu // CHECK-RV64-LABEL: define dso_local @test_vwcvtu_x_x_v_u64m8_mu // CHECK-RV64-SAME: ( [[MASK:%.*]], [[MASKEDOFF:%.*]], [[SRC:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i64.i64( [[MASKEDOFF]], [[SRC]], i64 0, [[MASK]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vwaddu.mask.nxv8i64.nxv8i32.i32.i64( [[MASKEDOFF]], [[SRC]], i32 0, [[MASK]], i64 [[VL]], i64 1) // CHECK-RV64-NEXT: ret [[TMP0]] // vuint64m8_t test_vwcvtu_x_x_v_u64m8_mu(vbool8_t mask, vuint64m8_t maskedoff, vuint32m4_t src, size_t vl) { diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei32.c new file mode 100644 index 0000000000000..7bf0a4e5b7b1f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei32.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4(const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxei32_v_bf16mf4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2(const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxei32_v_bf16mf2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxei32_v_bf16m1(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxei32_v_bf16m2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv16bf16.p0.nxv16i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4(const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vloxei32_v_bf16m4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m1_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m4_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei64.c new file mode 100644 index 0000000000000..be42373070f9d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4(const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxei64_v_bf16mf4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2(const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxei64_v_bf16mf2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxei64_v_bf16m1(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxei64_v_bf16m2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m1_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m2_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei8.c new file mode 100644 index 0000000000000..767405acfde03 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxei8.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4(const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vloxei8_v_bf16mf4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2(const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vloxei8_v_bf16mf2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxei8_v_bf16m1(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxei8_v_bf16m2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv16bf16.p0.nxv16i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4(const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vloxei8_v_bf16m4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv32bf16.p0.nxv32i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8(const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vloxei8_v_bf16m8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m1_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv32bf16.p0.nxv32i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1, + vuint8m4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m8_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei32.c new file mode 100644 index 0000000000000..adafe97dff8b8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei32.c @@ -0,0 +1,120 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m1x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2(const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf4x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m1x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m4x2_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei64.c new file mode 100644 index 0000000000000..0be1d3fb19ae8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m1x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf4x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m1x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m2x2_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei8.c new file mode 100644 index 0000000000000..8a6d93d429dff --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg2ei8.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m1x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2(const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf4x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m1x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m4x2_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei32.c new file mode 100644 index 0000000000000..00d0043a5e81b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei32.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf4x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m1x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf4x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf2x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m1x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m2x3_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei64.c new file mode 100644 index 0000000000000..0685d0cab692d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf4x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m1x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf4x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf2x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m1x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m2x3_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei8.c new file mode 100644 index 0000000000000..b68c5f56a65b8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg3ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf4x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m1x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf4x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf2x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m1x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m2x3_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei32.c new file mode 100644 index 0000000000000..0f65998c8f30b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei32.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf4x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m1x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf4x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf2x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m1x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m2x4_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei64.c new file mode 100644 index 0000000000000..9c120fc68fa82 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf4x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m1x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf4x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf2x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m1x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m2x4_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei8.c new file mode 100644 index 0000000000000..f7011650f9ed5 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg4ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf4x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m1x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf4x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf2x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m1x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m2x4_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei32.c new file mode 100644 index 0000000000000..c100047f765c0 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf4x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf2x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg5ei32_v_bf16m1x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf4x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf2x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16m1x5_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei64.c new file mode 100644 index 0000000000000..75342b035c3a2 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf4x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf2x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg5ei64_v_bf16m1x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf4x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf2x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16m1x5_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei8.c new file mode 100644 index 0000000000000..0e0b2ab502669 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg5ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf4x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf2x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg5ei8_v_bf16m1x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf4x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf2x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16m1x5_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei32.c new file mode 100644 index 0000000000000..5d686a64b3ff6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf4x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf2x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg6ei32_v_bf16m1x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf4x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf2x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16m1x6_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei64.c new file mode 100644 index 0000000000000..180e0e3946c57 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf4x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf2x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg6ei64_v_bf16m1x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf4x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf2x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16m1x6_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei8.c new file mode 100644 index 0000000000000..cb25709918f66 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg6ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf4x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf2x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg6ei8_v_bf16m1x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf4x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf2x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16m1x6_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei32.c new file mode 100644 index 0000000000000..a9c095a02ee80 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf4x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf2x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg7ei32_v_bf16m1x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf4x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf2x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16m1x7_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei64.c new file mode 100644 index 0000000000000..89d8f75f74b0b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf4x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf2x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg7ei64_v_bf16m1x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf4x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf2x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16m1x7_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei8.c new file mode 100644 index 0000000000000..e4aeaadd629c0 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg7ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf4x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf2x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg7ei8_v_bf16m1x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf4x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf2x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16m1x7_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei32.c new file mode 100644 index 0000000000000..552880c5d24b7 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf4x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf2x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg8ei32_v_bf16m1x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf4x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf2x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16m1x8_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei64.c new file mode 100644 index 0000000000000..f57756c4afb3c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf4x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf2x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg8ei64_v_bf16m1x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf4x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf2x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16m1x8_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei8.c new file mode 100644 index 0000000000000..6ae89de151b40 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vloxseg8ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf4x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf2x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg8ei8_v_bf16m1x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf4x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf2x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16m1x8_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei32.c new file mode 100644 index 0000000000000..44a77b981ceb6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei32.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4(const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxei32_v_bf16mf4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2(const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxei32_v_bf16mf2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxei32_v_bf16m1(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxei32_v_bf16m2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv16bf16.p0.nxv16i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4(const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vluxei32_v_bf16m4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m1_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m4_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei64.c new file mode 100644 index 0000000000000..67645b8ba5cb7 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4(const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxei64_v_bf16mf4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2(const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxei64_v_bf16mf2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxei64_v_bf16m1(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxei64_v_bf16m2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m1_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m2_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei8.c new file mode 100644 index 0000000000000..f7ac2be80e08d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxei8.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4(const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vluxei8_v_bf16mf4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2(const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vluxei8_v_bf16mf2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxei8_v_bf16m1(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxei8_v_bf16m2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv16bf16.p0.nxv16i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4(const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vluxei8_v_bf16m4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv32bf16.p0.nxv32i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8(const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vluxei8_v_bf16m8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m1_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv32bf16.p0.nxv32i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1, + vuint8m4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m8_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei32.c new file mode 100644 index 0000000000000..e570c09397e3d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei32.c @@ -0,0 +1,120 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m1x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2(const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf4x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m1x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m4x2_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei64.c new file mode 100644 index 0000000000000..cc806af77d848 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m1x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf4x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m1x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m2x2_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei8.c new file mode 100644 index 0000000000000..871567122ce16 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg2ei8.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m1x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m2x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2(const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m4x2(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf4x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m1x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m2x2_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m4x2_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei32.c new file mode 100644 index 0000000000000..770d7f2c255f8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei32.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf4x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m1x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf4x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf2x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m1x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m2x3_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei64.c new file mode 100644 index 0000000000000..505b2096aaf13 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf4x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m1x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf4x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf2x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m1x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m2x3_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei8.c new file mode 100644 index 0000000000000..708f28667f8ab --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg3ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf4x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m1x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m2x3(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf4x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf2x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m1x3_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m2x3_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei32.c new file mode 100644 index 0000000000000..7c300c57ad9ae --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei32.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf4x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m1x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf4x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf2x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m1x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m2x4_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei64.c new file mode 100644 index 0000000000000..db177f38000ed --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf4x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m1x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf4x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf2x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m1x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m2x4_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei8.c new file mode 100644 index 0000000000000..fa52ffcb0a53f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg4ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf4x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m1x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m2x4(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf4x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf2x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m1x4_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m2x4_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei32.c new file mode 100644 index 0000000000000..393cc317cef38 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf4x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf2x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg5ei32_v_bf16m1x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf4x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf2x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16m1x5_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei64.c new file mode 100644 index 0000000000000..8048dbed774bf --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf4x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf2x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg5ei64_v_bf16m1x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf4x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf2x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16m1x5_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei8.c new file mode 100644 index 0000000000000..8fc02319cfba5 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg5ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf4x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf2x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg5ei8_v_bf16m1x5(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf4x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf2x5_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16m1x5_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei32.c new file mode 100644 index 0000000000000..a5f680c8b7662 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf4x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf2x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg6ei32_v_bf16m1x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf4x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf2x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16m1x6_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei64.c new file mode 100644 index 0000000000000..50a5933e228ac --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf4x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf2x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg6ei64_v_bf16m1x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf4x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf2x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16m1x6_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei8.c new file mode 100644 index 0000000000000..8684080d2d362 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg6ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf4x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf2x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg6ei8_v_bf16m1x6(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf4x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf2x6_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16m1x6_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei32.c new file mode 100644 index 0000000000000..6bdcf10de0d34 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf4x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf2x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg7ei32_v_bf16m1x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf4x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf2x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16m1x7_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei64.c new file mode 100644 index 0000000000000..7cdd26a6aa481 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf4x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf2x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg7ei64_v_bf16m1x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf4x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf2x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16m1x7_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei8.c new file mode 100644 index 0000000000000..6ae6bd3e631d8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg7ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf4x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf2x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg7ei8_v_bf16m1x7(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf4x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf2x7_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16m1x7_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei32.c new file mode 100644 index 0000000000000..bbcae0625be42 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf4x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf2x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg8ei32_v_bf16m1x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf4x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf2x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16m1x8_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei64.c new file mode 100644 index 0000000000000..42b3365d1116f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf4x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf2x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg8ei64_v_bf16m1x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf4x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf2x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16m1x8_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei8.c new file mode 100644 index 0000000000000..a50bb4cedc6d6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vluxseg8ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf4x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf2x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg8ei8_v_bf16m1x8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf4x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf2x8_m(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16m1x8_m(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei32.c new file mode 100644 index 0000000000000..775a28a4fe1fd --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei32.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv1bf16.p0.nxv1i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16mf4(__bf16 *rs1, vuint32mf2_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsoxei32_v_bf16mf4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv2bf16.p0.nxv2i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16mf2(__bf16 *rs1, vuint32m1_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsoxei32_v_bf16mf2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv4bf16.p0.nxv4i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m1(__bf16 *rs1, vuint32m2_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsoxei32_v_bf16m1(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv8bf16.p0.nxv8i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m2(__bf16 *rs1, vuint32m4_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsoxei32_v_bf16m2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv16bf16.p0.nxv16i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m4(__bf16 *rs1, vuint32m8_t rs2, vbfloat16m4_t vs3, + size_t vl) { + return __riscv_vsoxei32_v_bf16m4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint32mf2_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsoxei32_v_bf16mf4_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsoxei32_v_bf16mf2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsoxei32_v_bf16m1_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsoxei32_v_bf16m2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint32m8_t rs2, + vbfloat16m4_t vs3, size_t vl) { + return __riscv_vsoxei32_v_bf16m4_m(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei64.c new file mode 100644 index 0000000000000..d69aa335b05b5 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv1bf16.p0.nxv1i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16mf4(__bf16 *rs1, vuint64m1_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsoxei64_v_bf16mf4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv2bf16.p0.nxv2i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16mf2(__bf16 *rs1, vuint64m2_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsoxei64_v_bf16mf2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv4bf16.p0.nxv4i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16m1(__bf16 *rs1, vuint64m4_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsoxei64_v_bf16m1(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv8bf16.p0.nxv8i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16m2(__bf16 *rs1, vuint64m8_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsoxei64_v_bf16m2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsoxei64_v_bf16mf4_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsoxei64_v_bf16mf2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsoxei64_v_bf16m1_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsoxei64_v_bf16m2_m(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei8.c new file mode 100644 index 0000000000000..e0e8376297555 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxei8.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv1bf16.p0.nxv1i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16mf4(__bf16 *rs1, vuint8mf8_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsoxei8_v_bf16mf4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv2bf16.p0.nxv2i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16mf2(__bf16 *rs1, vuint8mf4_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsoxei8_v_bf16mf2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv4bf16.p0.nxv4i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m1(__bf16 *rs1, vuint8mf2_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsoxei8_v_bf16m1(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv8bf16.p0.nxv8i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m2(__bf16 *rs1, vuint8m1_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsoxei8_v_bf16m2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv16bf16.p0.nxv16i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m4(__bf16 *rs1, vuint8m2_t rs2, vbfloat16m4_t vs3, + size_t vl) { + return __riscv_vsoxei8_v_bf16m4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv32bf16.p0.nxv32i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m8(__bf16 *rs1, vuint8m4_t rs2, vbfloat16m8_t vs3, + size_t vl) { + return __riscv_vsoxei8_v_bf16m8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsoxei8_v_bf16mf4_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsoxei8_v_bf16mf2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsoxei8_v_bf16m1_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsoxei8_v_bf16m2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint8m2_t rs2, + vbfloat16m4_t vs3, size_t vl) { + return __riscv_vsoxei8_v_bf16m4_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint8m4_t rs2, + vbfloat16m8_t vs3, size_t vl) { + return __riscv_vsoxei8_v_bf16m8_m(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei32.c new file mode 100644 index 0000000000000..560d303933a90 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei32.c @@ -0,0 +1,119 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16mf4x2(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16mf4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16mf2x2(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16mf2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m1x2(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16m1x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m2x2(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16m2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m4x2(__bf16 *rs1, vuint32m8_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16m4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x2_t vs3, + size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint32m8_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32_v_bf16m4x2_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei64.c new file mode 100644 index 0000000000000..41debc8613635 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16mf4x2(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64_v_bf16mf4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16mf2x2(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64_v_bf16mf2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16m1x2(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64_v_bf16m1x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16m2x2(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64_v_bf16m2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei8.c new file mode 100644 index 0000000000000..55b3ff48537a4 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg2ei8.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16mf4x2(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16mf4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16mf2x2(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16mf2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m1x2(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16m1x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m2x2(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16m2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m4x2(__bf16 *rs1, vuint8m2_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16m4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint8m2_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8_v_bf16m4x2_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei32.c new file mode 100644 index 0000000000000..9da13af17a33f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei32.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16mf4x3(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32_v_bf16mf4x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16mf2x3(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32_v_bf16mf2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16m1x3(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32_v_bf16m1x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16m2x3(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32_v_bf16m2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x3_t vs3, + size_t vl) { + return __riscv_vsoxseg3ei32_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei64.c new file mode 100644 index 0000000000000..f8e974cff66cd --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16mf4x3(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64_v_bf16mf4x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16mf2x3(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64_v_bf16mf2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16m1x3(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64_v_bf16m1x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16m2x3(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64_v_bf16m2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei8.c new file mode 100644 index 0000000000000..d80f01903e5c0 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg3ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16mf4x3(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8_v_bf16mf4x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16mf2x3(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8_v_bf16mf2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16m1x3(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8_v_bf16m1x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16m2x3(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8_v_bf16m2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei32.c new file mode 100644 index 0000000000000..dec74165c9e87 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei32.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16mf4x4(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32_v_bf16mf4x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16mf2x4(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32_v_bf16mf2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16m1x4(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32_v_bf16m1x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16m2x4(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32_v_bf16m2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x4_t vs3, + size_t vl) { + return __riscv_vsoxseg4ei32_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei64.c new file mode 100644 index 0000000000000..9b042b97ba547 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16mf4x4(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64_v_bf16mf4x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16mf2x4(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64_v_bf16mf2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16m1x4(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64_v_bf16m1x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16m2x4(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64_v_bf16m2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei8.c new file mode 100644 index 0000000000000..5c1ec6cbfd3ff --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg4ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16mf4x4(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8_v_bf16mf4x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16mf2x4(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8_v_bf16mf2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16m1x4(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8_v_bf16m1x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16m2x4(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8_v_bf16m2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei32.c new file mode 100644 index 0000000000000..e278c9da207ec --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16mf4x5(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32_v_bf16mf4x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16mf2x5(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32_v_bf16mf2x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16m1x5(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32_v_bf16m1x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x5_t vs3, + size_t vl) { + return __riscv_vsoxseg5ei32_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei64.c new file mode 100644 index 0000000000000..1439ab40b5be8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16mf4x5(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64_v_bf16mf4x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16mf2x5(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64_v_bf16mf2x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16m1x5(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64_v_bf16m1x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei8.c new file mode 100644 index 0000000000000..1b72716f25088 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg5ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16mf4x5(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8_v_bf16mf4x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16mf2x5(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8_v_bf16mf2x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16m1x5(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8_v_bf16m1x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei32.c new file mode 100644 index 0000000000000..7c659d353fdc4 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16mf4x6(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32_v_bf16mf4x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16mf2x6(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32_v_bf16mf2x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16m1x6(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32_v_bf16m1x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x6_t vs3, + size_t vl) { + return __riscv_vsoxseg6ei32_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei64.c new file mode 100644 index 0000000000000..b15b31e58fd01 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16mf4x6(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64_v_bf16mf4x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16mf2x6(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64_v_bf16mf2x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16m1x6(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64_v_bf16m1x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei8.c new file mode 100644 index 0000000000000..a18dc0cdc31cf --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg6ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16mf4x6(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8_v_bf16mf4x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16mf2x6(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8_v_bf16mf2x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16m1x6(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8_v_bf16m1x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei32.c new file mode 100644 index 0000000000000..6e41b5491682e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16mf4x7(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32_v_bf16mf4x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16mf2x7(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32_v_bf16mf2x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16m1x7(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32_v_bf16m1x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x7_t vs3, + size_t vl) { + return __riscv_vsoxseg7ei32_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei64.c new file mode 100644 index 0000000000000..ee8cedf02225f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16mf4x7(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64_v_bf16mf4x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16mf2x7(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64_v_bf16mf2x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16m1x7(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64_v_bf16m1x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei8.c new file mode 100644 index 0000000000000..ed07ca1803b75 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg7ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16mf4x7(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8_v_bf16mf4x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16mf2x7(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8_v_bf16mf2x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16m1x7(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8_v_bf16m1x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei32.c new file mode 100644 index 0000000000000..c2af8bdb067dc --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16mf4x8(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32_v_bf16mf4x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16mf2x8(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32_v_bf16mf2x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16m1x8(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32_v_bf16m1x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x8_t vs3, + size_t vl) { + return __riscv_vsoxseg8ei32_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei64.c new file mode 100644 index 0000000000000..ec8bc85ac4f86 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16mf4x8(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64_v_bf16mf4x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16mf2x8(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64_v_bf16mf2x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16m1x8(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64_v_bf16m1x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei8.c new file mode 100644 index 0000000000000..5ecd7ff291b8c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsoxseg8ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16mf4x8(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8_v_bf16mf4x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16mf2x8(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8_v_bf16mf2x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16m1x8(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8_v_bf16m1x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei32.c new file mode 100644 index 0000000000000..af8509247cf26 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei32.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv1bf16.p0.nxv1i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16mf4(__bf16 *rs1, vuint32mf2_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsuxei32_v_bf16mf4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv2bf16.p0.nxv2i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16mf2(__bf16 *rs1, vuint32m1_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsuxei32_v_bf16mf2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv4bf16.p0.nxv4i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m1(__bf16 *rs1, vuint32m2_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsuxei32_v_bf16m1(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv8bf16.p0.nxv8i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m2(__bf16 *rs1, vuint32m4_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsuxei32_v_bf16m2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv16bf16.p0.nxv16i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m4(__bf16 *rs1, vuint32m8_t rs2, vbfloat16m4_t vs3, + size_t vl) { + return __riscv_vsuxei32_v_bf16m4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint32mf2_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsuxei32_v_bf16mf4_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsuxei32_v_bf16mf2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsuxei32_v_bf16m1_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsuxei32_v_bf16m2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint32m8_t rs2, + vbfloat16m4_t vs3, size_t vl) { + return __riscv_vsuxei32_v_bf16m4_m(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei64.c new file mode 100644 index 0000000000000..d230cd6b4b757 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv1bf16.p0.nxv1i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16mf4(__bf16 *rs1, vuint64m1_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsuxei64_v_bf16mf4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv2bf16.p0.nxv2i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16mf2(__bf16 *rs1, vuint64m2_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsuxei64_v_bf16mf2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv4bf16.p0.nxv4i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16m1(__bf16 *rs1, vuint64m4_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsuxei64_v_bf16m1(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv8bf16.p0.nxv8i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16m2(__bf16 *rs1, vuint64m8_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsuxei64_v_bf16m2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsuxei64_v_bf16mf4_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsuxei64_v_bf16mf2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsuxei64_v_bf16m1_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsuxei64_v_bf16m2_m(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei8.c new file mode 100644 index 0000000000000..6c91f5feb6d16 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxei8.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv1bf16.p0.nxv1i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16mf4(__bf16 *rs1, vuint8mf8_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsuxei8_v_bf16mf4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv2bf16.p0.nxv2i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16mf2(__bf16 *rs1, vuint8mf4_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsuxei8_v_bf16mf2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv4bf16.p0.nxv4i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m1(__bf16 *rs1, vuint8mf2_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsuxei8_v_bf16m1(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv8bf16.p0.nxv8i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m2(__bf16 *rs1, vuint8m1_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsuxei8_v_bf16m2(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv16bf16.p0.nxv16i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m4(__bf16 *rs1, vuint8m2_t rs2, vbfloat16m4_t vs3, + size_t vl) { + return __riscv_vsuxei8_v_bf16m4(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv32bf16.p0.nxv32i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m8(__bf16 *rs1, vuint8m4_t rs2, vbfloat16m8_t vs3, + size_t vl) { + return __riscv_vsuxei8_v_bf16m8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsuxei8_v_bf16mf4_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsuxei8_v_bf16mf2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsuxei8_v_bf16m1_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsuxei8_v_bf16m2_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint8m2_t rs2, + vbfloat16m4_t vs3, size_t vl) { + return __riscv_vsuxei8_v_bf16m4_m(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint8m4_t rs2, + vbfloat16m8_t vs3, size_t vl) { + return __riscv_vsuxei8_v_bf16m8_m(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei32.c new file mode 100644 index 0000000000000..95e346cdd4c63 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei32.c @@ -0,0 +1,119 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16mf4x2(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16mf4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16mf2x2(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16mf2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m1x2(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16m1x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m2x2(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16m2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m4x2(__bf16 *rs1, vuint32m8_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16m4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x2_t vs3, + size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint32m8_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32_v_bf16m4x2_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei64.c new file mode 100644 index 0000000000000..9001cb1e23da0 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16mf4x2(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64_v_bf16mf4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16mf2x2(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64_v_bf16mf2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16m1x2(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64_v_bf16m1x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16m2x2(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64_v_bf16m2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei8.c new file mode 100644 index 0000000000000..3c5490439282c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg2ei8.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16mf4x2(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16mf4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16mf2x2(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16mf2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m1x2(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16m1x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m2x2(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16m2x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m4x2(__bf16 *rs1, vuint8m2_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16m4x2(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16mf4x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16mf2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16m1x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16m2x2_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint8m2_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8_v_bf16m4x2_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei32.c new file mode 100644 index 0000000000000..8fd5dd337ea5b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei32.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16mf4x3(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32_v_bf16mf4x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16mf2x3(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32_v_bf16mf2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16m1x3(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32_v_bf16m1x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16m2x3(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32_v_bf16m2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x3_t vs3, + size_t vl) { + return __riscv_vsuxseg3ei32_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei64.c new file mode 100644 index 0000000000000..6ebaff9a8deef --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16mf4x3(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64_v_bf16mf4x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16mf2x3(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64_v_bf16mf2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16m1x3(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64_v_bf16m1x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16m2x3(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64_v_bf16m2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei8.c new file mode 100644 index 0000000000000..79ffd53eaaa68 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg3ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16mf4x3(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8_v_bf16mf4x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16mf2x3(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8_v_bf16mf2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16m1x3(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8_v_bf16m1x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16m2x3(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8_v_bf16m2x3(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8_v_bf16mf4x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8_v_bf16mf2x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8_v_bf16m1x3_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8_v_bf16m2x3_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei32.c new file mode 100644 index 0000000000000..e19555ab1e0d6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei32.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16mf4x4(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32_v_bf16mf4x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16mf2x4(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32_v_bf16mf2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16m1x4(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32_v_bf16m1x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16m2x4(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32_v_bf16m2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x4_t vs3, + size_t vl) { + return __riscv_vsuxseg4ei32_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei64.c new file mode 100644 index 0000000000000..6f89441c69faf --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16mf4x4(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64_v_bf16mf4x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16mf2x4(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64_v_bf16mf2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16m1x4(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64_v_bf16m1x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16m2x4(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64_v_bf16m2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei8.c new file mode 100644 index 0000000000000..1a4d4523b2233 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg4ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16mf4x4(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8_v_bf16mf4x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16mf2x4(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8_v_bf16mf2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16m1x4(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8_v_bf16m1x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16m2x4(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8_v_bf16m2x4(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8_v_bf16mf4x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8_v_bf16mf2x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8_v_bf16m1x4_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8_v_bf16m2x4_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei32.c new file mode 100644 index 0000000000000..3a96d09995da9 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16mf4x5(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32_v_bf16mf4x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16mf2x5(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32_v_bf16mf2x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16m1x5(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32_v_bf16m1x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x5_t vs3, + size_t vl) { + return __riscv_vsuxseg5ei32_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei64.c new file mode 100644 index 0000000000000..2d34ab4e7a36e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16mf4x5(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64_v_bf16mf4x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16mf2x5(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64_v_bf16mf2x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16m1x5(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64_v_bf16m1x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei8.c new file mode 100644 index 0000000000000..f6f7b4ae6b96e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg5ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16mf4x5(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8_v_bf16mf4x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16mf2x5(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8_v_bf16mf2x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16m1x5(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8_v_bf16m1x5(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8_v_bf16mf4x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8_v_bf16mf2x5_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8_v_bf16m1x5_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei32.c new file mode 100644 index 0000000000000..7db63cb97d512 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16mf4x6(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32_v_bf16mf4x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16mf2x6(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32_v_bf16mf2x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16m1x6(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32_v_bf16m1x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x6_t vs3, + size_t vl) { + return __riscv_vsuxseg6ei32_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei64.c new file mode 100644 index 0000000000000..dd6c263688e7e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16mf4x6(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64_v_bf16mf4x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16mf2x6(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64_v_bf16mf2x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16m1x6(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64_v_bf16m1x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei8.c new file mode 100644 index 0000000000000..157eba825dc2d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg6ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16mf4x6(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8_v_bf16mf4x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16mf2x6(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8_v_bf16mf2x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16m1x6(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8_v_bf16m1x6(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8_v_bf16mf4x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8_v_bf16mf2x6_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8_v_bf16m1x6_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei32.c new file mode 100644 index 0000000000000..8e48179147053 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16mf4x7(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32_v_bf16mf4x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16mf2x7(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32_v_bf16mf2x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16m1x7(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32_v_bf16m1x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x7_t vs3, + size_t vl) { + return __riscv_vsuxseg7ei32_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei64.c new file mode 100644 index 0000000000000..6c9a0443f425d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16mf4x7(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64_v_bf16mf4x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16mf2x7(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64_v_bf16mf2x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16m1x7(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64_v_bf16m1x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei8.c new file mode 100644 index 0000000000000..27ced38cf8407 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg7ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16mf4x7(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8_v_bf16mf4x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16mf2x7(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8_v_bf16mf2x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16m1x7(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8_v_bf16m1x7(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8_v_bf16mf4x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8_v_bf16mf2x7_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8_v_bf16m1x7_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei32.c new file mode 100644 index 0000000000000..81adc3cb6ba5e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16mf4x8(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32_v_bf16mf4x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16mf2x8(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32_v_bf16mf2x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16m1x8(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32_v_bf16m1x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x8_t vs3, + size_t vl) { + return __riscv_vsuxseg8ei32_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei64.c new file mode 100644 index 0000000000000..43d76fc2dfd74 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16mf4x8(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64_v_bf16mf4x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16mf2x8(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64_v_bf16mf2x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16m1x8(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64_v_bf16m1x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei8.c new file mode 100644 index 0000000000000..3976ba816dbeb --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vsuxseg8ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16mf4x8(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8_v_bf16mf4x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16mf2x8(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8_v_bf16mf2x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16m1x8(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8_v_bf16m1x8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8_v_bf16mf4x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8_v_bf16mf2x8_m(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8_v_bf16m1x8_m(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei32.c new file mode 100644 index 0000000000000..dbf7941678b33 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei32.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4(const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2(const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv16bf16.p0.nxv16i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4(const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vloxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei64.c new file mode 100644 index 0000000000000..9a35316065afd --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4(const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2(const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei8.c new file mode 100644 index 0000000000000..e3a6c813660b3 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxei8.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4(const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vloxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2(const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vloxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv16bf16.p0.nxv16i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4(const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vloxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv32bf16.p0.nxv32i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8(const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vloxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv32bf16.p0.nxv32i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1, + vuint8m4_t rs2, size_t vl) { + return __riscv_vloxei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei32.c new file mode 100644 index 0000000000000..7853a29bcfb11 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei32.c @@ -0,0 +1,120 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2(const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei64.c new file mode 100644 index 0000000000000..0a68ccc33290c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg2ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxseg2ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei8.c new file mode 100644 index 0000000000000..05b59f3b3e259 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg2ei8.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2(const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei32.c new file mode 100644 index 0000000000000..683e30c9a6692 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei32.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg3ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxseg3ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei64.c new file mode 100644 index 0000000000000..69b2809272eba --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg3ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxseg3ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei8.c new file mode 100644 index 0000000000000..c6f00fe76ba26 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg3ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg3ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxseg3ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei32.c new file mode 100644 index 0000000000000..55a980a6f67d9 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei32.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg4ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxseg4ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei64.c new file mode 100644 index 0000000000000..75a0200476176 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg4ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxseg4ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei8.c new file mode 100644 index 0000000000000..f0c75373fc92c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg4ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg4ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxseg4ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei32.c new file mode 100644 index 0000000000000..132e27cd557c4 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg5ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei64.c new file mode 100644 index 0000000000000..ce1cfeddba1ca --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg5ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei8.c new file mode 100644 index 0000000000000..55b835f3a5421 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg5ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg5ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei32.c new file mode 100644 index 0000000000000..c8df8d0c6b907 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg6ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei64.c new file mode 100644 index 0000000000000..217b89ce831d1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg6ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei8.c new file mode 100644 index 0000000000000..4684ebad695a1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg6ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg6ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei32.c new file mode 100644 index 0000000000000..d9e2e9b41fed2 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg7ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei64.c new file mode 100644 index 0000000000000..461eebb092c54 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg7ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei8.c new file mode 100644 index 0000000000000..71da2628c9fd6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg7ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg7ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei32.c new file mode 100644 index 0000000000000..7848efaecaec1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxseg8ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei64.c new file mode 100644 index 0000000000000..4e3e388d260b8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxseg8ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei8.c new file mode 100644 index 0000000000000..399a03afa0c0e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vloxseg8ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxseg8ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei32.c new file mode 100644 index 0000000000000..20bcca1a3fd38 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei32.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4(const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2(const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv16bf16.p0.nxv16i32.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4(const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vluxei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i32.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei64.c new file mode 100644 index 0000000000000..62012fd029307 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4(const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2(const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i64.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i64.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei8.c new file mode 100644 index 0000000000000..c4bf30d7319c5 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxei8.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4(const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vluxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2(const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vluxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv16bf16.p0.nxv16i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4(const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vluxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv32bf16.p0.nxv32i8.i64( poison, ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8(const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vluxei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_m(vbool4_t vm, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv32bf16.p0.nxv32i8.i64( poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_m(vbool2_t vm, const __bf16 *rs1, + vuint8m4_t rs2, size_t vl) { + return __riscv_vluxei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei32.c new file mode 100644 index 0000000000000..85934361676dc --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei32.c @@ -0,0 +1,120 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2(const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei64.c new file mode 100644 index 0000000000000..7def329db7be3 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg2ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxseg2ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei8.c new file mode 100644 index 0000000000000..bc6f1ffab972c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg2ei8.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2(const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_m(vbool4_t vm, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei32.c new file mode 100644 index 0000000000000..8a700b75b4299 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei32.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg3ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxseg3ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei64.c new file mode 100644 index 0000000000000..d1d8943310e0b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg3ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxseg3ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei8.c new file mode 100644 index 0000000000000..4952e0230a8ea --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg3ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg3ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxseg3ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei32.c new file mode 100644 index 0000000000000..d853b86e3dda8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei32.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg4ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4(const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxseg4ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei64.c new file mode 100644 index 0000000000000..97dbdd705fc9f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei64.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg4ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4(const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxseg4ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei8.c new file mode 100644 index 0000000000000..db435cf18daa4 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg4ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg4ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4(const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxseg4ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_m(vbool8_t vm, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei32.c new file mode 100644 index 0000000000000..3719e841581d1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg5ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei64.c new file mode 100644 index 0000000000000..66743a528fd52 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg5ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei8.c new file mode 100644 index 0000000000000..f97f5b7597170 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg5ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg5ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei32.c new file mode 100644 index 0000000000000..6d760e51245c1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg6ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei64.c new file mode 100644 index 0000000000000..3b2ffacfcda62 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg6ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei8.c new file mode 100644 index 0000000000000..1fef58ed84851 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg6ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg6ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei32.c new file mode 100644 index 0000000000000..14d7ca9f34800 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg7ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei64.c new file mode 100644 index 0000000000000..50d06a0f4b856 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg7ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei8.c new file mode 100644 index 0000000000000..cdd05faeeded6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg7ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg7ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei32.c new file mode 100644 index 0000000000000..8929f3348ba59 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei32.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8(const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8(const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8(const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxseg8ei32(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_m(vbool64_t vm, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_m(vbool32_t vm, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei64.c new file mode 100644 index 0000000000000..ece3490dfcc37 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei64.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8(const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8(const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8(const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxseg8ei64(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_m(vbool64_t vm, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_m(vbool32_t vm, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei8.c new file mode 100644 index 0000000000000..9ee1ce5f28820 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vluxseg8ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8(const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8(const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8(const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxseg8ei8(rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_m(vbool64_t vm, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_m(vbool32_t vm, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8(vm, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) poison, ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 3, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_m(vbool16_t vm, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8(vm, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei32.c new file mode 100644 index 0000000000000..db440e9a49157 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei32.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv1bf16.p0.nxv1i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16mf4(__bf16 *rs1, vuint32mf2_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsoxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv2bf16.p0.nxv2i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16mf2(__bf16 *rs1, vuint32m1_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsoxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv4bf16.p0.nxv4i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m1(__bf16 *rs1, vuint32m2_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsoxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv8bf16.p0.nxv8i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m2(__bf16 *rs1, vuint32m4_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsoxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv16bf16.p0.nxv16i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m4(__bf16 *rs1, vuint32m8_t rs2, vbfloat16m4_t vs3, + size_t vl) { + return __riscv_vsoxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint32mf2_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsoxei32(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsoxei32(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsoxei32(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsoxei32(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei32_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei32_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint32m8_t rs2, + vbfloat16m4_t vs3, size_t vl) { + return __riscv_vsoxei32(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei64.c new file mode 100644 index 0000000000000..a9c28dd6af613 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv1bf16.p0.nxv1i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16mf4(__bf16 *rs1, vuint64m1_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsoxei64(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv2bf16.p0.nxv2i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16mf2(__bf16 *rs1, vuint64m2_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsoxei64(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv4bf16.p0.nxv4i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16m1(__bf16 *rs1, vuint64m4_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsoxei64(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv8bf16.p0.nxv8i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16m2(__bf16 *rs1, vuint64m8_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsoxei64(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsoxei64(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsoxei64(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsoxei64(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei64_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei64_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsoxei64(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei8.c new file mode 100644 index 0000000000000..7106538c741d3 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxei8.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv1bf16.p0.nxv1i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16mf4(__bf16 *rs1, vuint8mf8_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsoxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv2bf16.p0.nxv2i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16mf2(__bf16 *rs1, vuint8mf4_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsoxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv4bf16.p0.nxv4i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m1(__bf16 *rs1, vuint8mf2_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsoxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv8bf16.p0.nxv8i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m2(__bf16 *rs1, vuint8m1_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsoxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv16bf16.p0.nxv16i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m4(__bf16 *rs1, vuint8m2_t rs2, vbfloat16m4_t vs3, + size_t vl) { + return __riscv_vsoxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.nxv32bf16.p0.nxv32i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m8(__bf16 *rs1, vuint8m4_t rs2, vbfloat16m8_t vs3, + size_t vl) { + return __riscv_vsoxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsoxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsoxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsoxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsoxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint8m2_t rs2, + vbfloat16m4_t vs3, size_t vl) { + return __riscv_vsoxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxei8_v_bf16m8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxei8_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint8m4_t rs2, + vbfloat16m8_t vs3, size_t vl) { + return __riscv_vsoxei8(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei32.c new file mode 100644 index 0000000000000..cc3954a0eecf2 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei32.c @@ -0,0 +1,119 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16mf4x2(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16mf2x2(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m1x2(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m2x2(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m4x2(__bf16 *rs1, vuint32m8_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x2_t vs3, + size_t vl) { + return __riscv_vsoxseg2ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint32m8_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei64.c new file mode 100644 index 0000000000000..5b716beb149b7 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16mf4x2(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16mf2x2(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16m1x2(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16m2x2(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei64_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei64_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei8.c new file mode 100644 index 0000000000000..c346040ae6da3 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg2ei8.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16mf4x2(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16mf2x2(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m1x2(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m2x2(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m4x2(__bf16 *rs1, vuint8m2_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei8_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei8_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint8m2_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsoxseg2ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei32.c new file mode 100644 index 0000000000000..c283b82c2f9f9 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei32.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16mf4x3(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16mf2x3(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16m1x3(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16m2x3(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x3_t vs3, + size_t vl) { + return __riscv_vsoxseg3ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei32_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei32_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei64.c new file mode 100644 index 0000000000000..3a7dd53a4999b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16mf4x3(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16mf2x3(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16m1x3(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16m2x3(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei64_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei64_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei8.c new file mode 100644 index 0000000000000..5ae57d256056e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg3ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16mf4x3(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16mf2x3(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16m1x3(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16m2x3(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg3ei8_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg3ei8_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsoxseg3ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei32.c new file mode 100644 index 0000000000000..23fe189930964 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei32.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16mf4x4(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16mf2x4(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16m1x4(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16m2x4(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x4_t vs3, + size_t vl) { + return __riscv_vsoxseg4ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei32_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei32_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei64.c new file mode 100644 index 0000000000000..5411c6af8ae7a --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16mf4x4(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16mf2x4(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16m1x4(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16m2x4(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei64_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei64_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei8.c new file mode 100644 index 0000000000000..783029bc125a6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg4ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16mf4x4(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16mf2x4(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16m1x4(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16m2x4(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg4ei8_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg4ei8_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsoxseg4ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei32.c new file mode 100644 index 0000000000000..9f18e9c1a965c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16mf4x5(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16mf2x5(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16m1x5(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x5_t vs3, + size_t vl) { + return __riscv_vsoxseg5ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei32_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei32_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei64.c new file mode 100644 index 0000000000000..8ea144f8d9882 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16mf4x5(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16mf2x5(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16m1x5(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei64_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei64_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei8.c new file mode 100644 index 0000000000000..d61d35ec7ef22 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg5ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16mf4x5(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16mf2x5(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16m1x5(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg5ei8_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg5ei8_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsoxseg5ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei32.c new file mode 100644 index 0000000000000..4050a61ec1d66 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16mf4x6(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16mf2x6(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16m1x6(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x6_t vs3, + size_t vl) { + return __riscv_vsoxseg6ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei32_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei32_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei64.c new file mode 100644 index 0000000000000..90fc1a66802fb --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16mf4x6(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16mf2x6(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16m1x6(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei64_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei64_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei8.c new file mode 100644 index 0000000000000..7c9b7fd261ef2 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg6ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16mf4x6(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16mf2x6(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16m1x6(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg6ei8_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg6ei8_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsoxseg6ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei32.c new file mode 100644 index 0000000000000..2fa45968720e7 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16mf4x7(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16mf2x7(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16m1x7(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x7_t vs3, + size_t vl) { + return __riscv_vsoxseg7ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei32_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei32_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei64.c new file mode 100644 index 0000000000000..26462c20ab1a1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16mf4x7(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16mf2x7(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16m1x7(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei64_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei64_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei8.c new file mode 100644 index 0000000000000..b3709517fd352 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg7ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16mf4x7(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16mf2x7(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16m1x7(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg7ei8_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg7ei8_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsoxseg7ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei32.c new file mode 100644 index 0000000000000..412fdb9e37643 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16mf4x8(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16mf2x8(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16m1x8(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x8_t vs3, + size_t vl) { + return __riscv_vsoxseg8ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei32_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei32_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei64.c new file mode 100644 index 0000000000000..7f34e54b0a1fb --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16mf4x8(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16mf2x8(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16m1x8(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei64_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei64_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei8.c new file mode 100644 index 0000000000000..088ca08fca7f4 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsoxseg8ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16mf4x8(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16mf2x8(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16m1x8(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg8ei8_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg8ei8_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsoxseg8ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei32.c new file mode 100644 index 0000000000000..13a2d299c8f73 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei32.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv1bf16.p0.nxv1i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16mf4(__bf16 *rs1, vuint32mf2_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsuxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv2bf16.p0.nxv2i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16mf2(__bf16 *rs1, vuint32m1_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsuxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv4bf16.p0.nxv4i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m1(__bf16 *rs1, vuint32m2_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsuxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv8bf16.p0.nxv8i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m2(__bf16 *rs1, vuint32m4_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsuxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv16bf16.p0.nxv16i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m4(__bf16 *rs1, vuint32m8_t rs2, vbfloat16m4_t vs3, + size_t vl) { + return __riscv_vsuxei32(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint32mf2_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsuxei32(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsuxei32(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsuxei32(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsuxei32(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei32_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei32_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint32m8_t rs2, + vbfloat16m4_t vs3, size_t vl) { + return __riscv_vsuxei32(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei64.c new file mode 100644 index 0000000000000..a6c618a2fb760 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv1bf16.p0.nxv1i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16mf4(__bf16 *rs1, vuint64m1_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsuxei64(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv2bf16.p0.nxv2i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16mf2(__bf16 *rs1, vuint64m2_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsuxei64(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv4bf16.p0.nxv4i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16m1(__bf16 *rs1, vuint64m4_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsuxei64(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv8bf16.p0.nxv8i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16m2(__bf16 *rs1, vuint64m8_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsuxei64(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsuxei64(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsuxei64(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsuxei64(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei64_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei64_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsuxei64(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei8.c new file mode 100644 index 0000000000000..67c4d4b952a3a --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxei8.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16mf4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv1bf16.p0.nxv1i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16mf4(__bf16 *rs1, vuint8mf8_t rs2, vbfloat16mf4_t vs3, + size_t vl) { + return __riscv_vsuxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16mf2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv2bf16.p0.nxv2i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16mf2(__bf16 *rs1, vuint8mf4_t rs2, vbfloat16mf2_t vs3, + size_t vl) { + return __riscv_vsuxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m1( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv4bf16.p0.nxv4i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m1(__bf16 *rs1, vuint8mf2_t rs2, vbfloat16m1_t vs3, + size_t vl) { + return __riscv_vsuxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv8bf16.p0.nxv8i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m2(__bf16 *rs1, vuint8m1_t rs2, vbfloat16m2_t vs3, + size_t vl) { + return __riscv_vsuxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv16bf16.p0.nxv16i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m4(__bf16 *rs1, vuint8m2_t rs2, vbfloat16m4_t vs3, + size_t vl) { + return __riscv_vsuxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.nxv32bf16.p0.nxv32i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m8(__bf16 *rs1, vuint8m4_t rs2, vbfloat16m8_t vs3, + size_t vl) { + return __riscv_vsuxei8(rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16mf4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16mf4_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t rs2, + vbfloat16mf4_t vs3, size_t vl) { + return __riscv_vsuxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16mf2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16mf2_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t rs2, + vbfloat16mf2_t vs3, size_t vl) { + return __riscv_vsuxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m1_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m1_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t rs2, + vbfloat16m1_t vs3, size_t vl) { + return __riscv_vsuxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m2_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t rs2, + vbfloat16m2_t vs3, size_t vl) { + return __riscv_vsuxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m4_m(vbool4_t vm, __bf16 *rs1, vuint8m2_t rs2, + vbfloat16m4_t vs3, size_t vl) { + return __riscv_vsuxei8(vm, rs1, rs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxei8_v_bf16m8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VS3]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxei8_v_bf16m8_m(vbool2_t vm, __bf16 *rs1, vuint8m4_t rs2, + vbfloat16m8_t vs3, size_t vl) { + return __riscv_vsuxei8(vm, rs1, rs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei32.c new file mode 100644 index 0000000000000..9a3b30c9eff1a --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei32.c @@ -0,0 +1,119 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16mf4x2(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16mf2x2(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m1x2(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m2x2(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m4x2(__bf16 *rs1, vuint32m8_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x2_t vs3, + size_t vl) { + return __riscv_vsuxseg2ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint32m8_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei64.c new file mode 100644 index 0000000000000..b2c445730d2fb --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16mf4x2(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16mf2x2(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16m1x2(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16m2x2(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei64_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei64_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei8.c new file mode 100644 index 0000000000000..6b9774c560521 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg2ei8.c @@ -0,0 +1,118 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16mf4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16mf4x2(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16mf2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16mf2x2(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m1x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m1x2(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m2x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m2x2(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m4x2( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m4x2(__bf16 *rs1, vuint8m2_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16mf4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16mf4x2_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16mf2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16mf2x2_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m1x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m1x2_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m2x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m2x2_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei8_v_bf16m4x2_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 2) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei8_v_bf16m4x2_m(vbool4_t vm, __bf16 *rs1, vuint8m2_t vs2, + vbfloat16m4x2_t vs3, size_t vl) { + return __riscv_vsuxseg2ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei32.c new file mode 100644 index 0000000000000..402045661c33f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei32.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16mf4x3(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16mf2x3(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16m1x3(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16m2x3(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x3_t vs3, + size_t vl) { + return __riscv_vsuxseg3ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei32_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei32_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei64.c new file mode 100644 index 0000000000000..f1ebe0b61966a --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16mf4x3(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16mf2x3(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16m1x3(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16m2x3(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei64_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei64_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei8.c new file mode 100644 index 0000000000000..d0c36534ca869 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg3ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16mf4x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16mf4x3(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16mf2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16mf2x3(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16m1x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16m1x3(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16m2x3( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16m2x3(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16mf4x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16mf4x3_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16mf2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16mf2x3_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16m1x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16m1x3_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg3ei8_v_bf16m2x3_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 3) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg3ei8_v_bf16m2x3_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x3_t vs3, size_t vl) { + return __riscv_vsuxseg3ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei32.c new file mode 100644 index 0000000000000..6b1c29dae50ad --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei32.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16mf4x4(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16mf2x4(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16m1x4(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16m2x4(__bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x4_t vs3, + size_t vl) { + return __riscv_vsuxseg4ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei32_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei32_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint32m4_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei64.c new file mode 100644 index 0000000000000..8c3c04ee76a50 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei64.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16mf4x4(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16mf2x4(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16m1x4(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16m2x4(__bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei64_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei64_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint64m8_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei8.c new file mode 100644 index 0000000000000..b052914453463 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg4ei8.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16mf4x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16mf4x4(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16mf2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16mf2x4(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16m1x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16m1x4(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16m2x4( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16m2x4(__bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16mf4x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16mf4x4_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16mf2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16mf2x4_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16m1x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16m1x4_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg4ei8_v_bf16m2x4_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 4) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg4ei8_v_bf16m2x4_m(vbool8_t vm, __bf16 *rs1, vuint8m1_t vs2, + vbfloat16m2x4_t vs3, size_t vl) { + return __riscv_vsuxseg4ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei32.c new file mode 100644 index 0000000000000..8e2b11b0bda6f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16mf4x5(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16mf2x5(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16m1x5(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x5_t vs3, + size_t vl) { + return __riscv_vsuxseg5ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei32_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei32_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei64.c new file mode 100644 index 0000000000000..e3f24c791744e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16mf4x5(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16mf2x5(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16m1x5(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei64_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei64_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei8.c new file mode 100644 index 0000000000000..1130456ee6cfd --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg5ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16mf4x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16mf4x5(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16mf2x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16mf2x5(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16m1x5( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16m1x5(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16mf4x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16mf4x5_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16mf2x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16mf2x5_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg5ei8_v_bf16m1x5_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 5) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg5ei8_v_bf16m1x5_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x5_t vs3, size_t vl) { + return __riscv_vsuxseg5ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei32.c new file mode 100644 index 0000000000000..dc619aa60aef0 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16mf4x6(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16mf2x6(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16m1x6(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x6_t vs3, + size_t vl) { + return __riscv_vsuxseg6ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei32_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei32_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei64.c new file mode 100644 index 0000000000000..35a776bcda289 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16mf4x6(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16mf2x6(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16m1x6(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei64_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei64_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei8.c new file mode 100644 index 0000000000000..d474a5682b17c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg6ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16mf4x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16mf4x6(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16mf2x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16mf2x6(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16m1x6( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16m1x6(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16mf4x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16mf4x6_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16mf2x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16mf2x6_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg6ei8_v_bf16m1x6_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 6) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg6ei8_v_bf16m1x6_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x6_t vs3, size_t vl) { + return __riscv_vsuxseg6ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei32.c new file mode 100644 index 0000000000000..d2e1859fa300f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16mf4x7(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16mf2x7(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16m1x7(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x7_t vs3, + size_t vl) { + return __riscv_vsuxseg7ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei32_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei32_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei64.c new file mode 100644 index 0000000000000..cfc1cfca208c1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16mf4x7(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16mf2x7(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16m1x7(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei64_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei64_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei8.c new file mode 100644 index 0000000000000..a887bf12fd5bf --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg7ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16mf4x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16mf4x7(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16mf2x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16mf2x7(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16m1x7( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16m1x7(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16mf4x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16mf4x7_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16mf2x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16mf2x7_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg7ei8_v_bf16m1x7_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 7) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg7ei8_v_bf16m1x7_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x7_t vs3, size_t vl) { + return __riscv_vsuxseg7ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei32.c new file mode 100644 index 0000000000000..38f3f8a3a96e2 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei32.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16mf4x8(__bf16 *rs1, vuint32mf2_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16mf2x8(__bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16m1x8(__bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, + vuint32mf2_t vs2, vbfloat16mf4x8_t vs3, + size_t vl) { + return __riscv_vsuxseg8ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint32m1_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei32_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei32_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint32m2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei32(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei64.c new file mode 100644 index 0000000000000..4adeaf94608eb --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei64.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16mf4x8(__bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16mf2x8(__bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16m1x8(__bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, vuint64m1_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint64m2_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei64_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei64_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint64m4_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei64(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei8.c new file mode 100644 index 0000000000000..25cbcf6887063 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vsuxseg8ei8.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16mf4x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16mf4x8(__bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16mf2x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16mf2x8(__bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16m1x8( +// CHECK-RV64-SAME: ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16m1x8(__bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8(rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16mf4x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16mf4x8_m(vbool64_t vm, __bf16 *rs1, vuint8mf8_t vs2, + vbfloat16mf4x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16mf2x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16mf2x8_m(vbool32_t vm, __bf16 *rs1, vuint8mf4_t vs2, + vbfloat16mf2x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8(vm, rs1, vs2, vs3, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg8ei8_v_bf16m1x8_m( +// CHECK-RV64-SAME: [[VM:%.*]], ptr noundef [[RS1:%.*]], [[VS2:%.*]], target("riscv.vector.tuple", , 8) [[VS3:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VS3]], ptr [[RS1]], [[VS2]], [[VM]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg8ei8_v_bf16m1x8_m(vbool16_t vm, __bf16 *rs1, vuint8mf2_t vs2, + vbfloat16m1x8_t vs3, size_t vl) { + return __riscv_vsuxseg8ei8(vm, rs1, vs2, vs3, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei32.c new file mode 100644 index 0000000000000..ec107fa023afa --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei32.c @@ -0,0 +1,208 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m1_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m1_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m1_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16mf2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m1_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vloxei32_v_bf16m4_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei64.c new file mode 100644 index 0000000000000..22081708baf15 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei64.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m1_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m1_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m1_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16mf2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m1_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxei64_v_bf16m2_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei8.c new file mode 100644 index 0000000000000..b0b97875ac3ba --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxei8.c @@ -0,0 +1,248 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m1_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, vuint8m4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m1_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint8m4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m1_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint8m4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16mf2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m1_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint8m4_t rs2, size_t vl) { + return __riscv_vloxei8_v_bf16m8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei32.c new file mode 100644 index 0000000000000..df494cfe1b233 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei32.c @@ -0,0 +1,208 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m1x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei64.c new file mode 100644 index 0000000000000..1d7d48a3305c9 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei64.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m1x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei8.c new file mode 100644 index 0000000000000..44070dd3888c7 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg2ei8.c @@ -0,0 +1,208 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m1x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei32.c new file mode 100644 index 0000000000000..86b6692126e89 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei32.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf4x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m1x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei64.c new file mode 100644 index 0000000000000..960df2840fadd --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei64.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf4x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m1x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei8.c new file mode 100644 index 0000000000000..00d0958458b0f --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg3ei8.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf4x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m1x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei32.c new file mode 100644 index 0000000000000..0ad8e794bc4bc --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei32.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf4x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m1x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei64.c new file mode 100644 index 0000000000000..337031e119f7d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei64.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf4x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m1x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei8.c new file mode 100644 index 0000000000000..f2d9383676af1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg4ei8.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf4x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m1x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei32.c new file mode 100644 index 0000000000000..8a43698cc7b8d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei32.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf4x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf2x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16m1x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei64.c new file mode 100644 index 0000000000000..90bd04ecaf510 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei64.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf4x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf2x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16m1x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei8.c new file mode 100644 index 0000000000000..bd25294cbade4 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg5ei8.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf4x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf2x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16m1x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei32.c new file mode 100644 index 0000000000000..017317d5d681a --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei32.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf4x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf2x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16m1x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei64.c new file mode 100644 index 0000000000000..b835ec1bbcbf6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei64.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf4x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf2x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16m1x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei8.c new file mode 100644 index 0000000000000..7f12dd202bed6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg6ei8.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf4x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf2x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16m1x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei32.c new file mode 100644 index 0000000000000..6478fcf7ab914 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei32.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf4x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf2x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16m1x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei64.c new file mode 100644 index 0000000000000..986045a6b85b8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei64.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf4x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf2x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16m1x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei8.c new file mode 100644 index 0000000000000..bfe80e670df0b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg7ei8.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf4x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf2x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16m1x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei32.c new file mode 100644 index 0000000000000..c5679ae3ca327 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei32.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf4x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf2x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16m1x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei64.c new file mode 100644 index 0000000000000..3e87a24394fd0 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei64.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf4x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf2x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16m1x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei8.c new file mode 100644 index 0000000000000..b4c20694b3599 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vloxseg8ei8.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf4x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf2x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16m1x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei32.c new file mode 100644 index 0000000000000..85d344f63301b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei32.c @@ -0,0 +1,208 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m1_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m1_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m1_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16mf2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m1_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vluxei32_v_bf16m4_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei64.c new file mode 100644 index 0000000000000..7d5914e241fad --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei64.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m1_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m1_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m1_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16mf2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m1_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxei64_v_bf16m2_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei8.c new file mode 100644 index 0000000000000..118843e2adf35 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxei8.c @@ -0,0 +1,248 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m1_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, vuint8m4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m1_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint8m4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m1_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint8m4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16mf2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m1_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, const __bf16 *rs1, vuint8m4_t rs2, size_t vl) { + return __riscv_vluxei8_v_bf16m8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei32.c new file mode 100644 index 0000000000000..3428217434ec2 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei32.c @@ -0,0 +1,208 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m1x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei64.c new file mode 100644 index 0000000000000..487ae96284022 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei64.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m1x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei8.c new file mode 100644 index 0000000000000..d228b2b6db49d --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg2ei8.c @@ -0,0 +1,208 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_tu(vbfloat16m1x2_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m1x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_tu(vbfloat16m2x2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m2x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_tu(vbfloat16m4x2_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m4x2_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_tum(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_tum(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_tum(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m1x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m2x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m4x2_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_tumu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_tumu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_tumu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m1x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_tumu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m2x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_tumu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m4x2_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_mu(vbool64_t vm, vbfloat16mf4x2_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf4x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_mu(vbool32_t vm, vbfloat16mf2x2_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16mf2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m1x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m2x2_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, const __bf16 *rs1, vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_v_bf16m4x2_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei32.c new file mode 100644 index 0000000000000..ff211e99a5f4e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei32.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf4x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m1x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei64.c new file mode 100644 index 0000000000000..ae79962b81b46 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei64.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf4x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m1x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei8.c new file mode 100644 index 0000000000000..18c7af2663099 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg3ei8.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf4x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_tu(vbfloat16m1x3_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m1x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_tu(vbfloat16m2x3_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m2x3_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_tum(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf4x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_tum(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_tum(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m1x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m2x3_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_tumu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf4x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_tumu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_tumu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m1x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_tumu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m2x3_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_mu(vbool64_t vm, vbfloat16mf4x3_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf4x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_mu(vbool32_t vm, vbfloat16mf2x3_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16mf2x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m1x3_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_v_bf16m2x3_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei32.c new file mode 100644 index 0000000000000..d0c9adf52942c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei32.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf4x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m1x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei64.c new file mode 100644 index 0000000000000..b68db5fefe5e8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei64.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf4x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m1x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei8.c new file mode 100644 index 0000000000000..1cca5289ab74b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg4ei8.c @@ -0,0 +1,168 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf4x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_tu(vbfloat16m1x4_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m1x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_tu(vbfloat16m2x4_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m2x4_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_tum(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf4x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_tum(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_tum(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m1x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m2x4_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_tumu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf4x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_tumu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_tumu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m1x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_tumu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m2x4_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_mu(vbool64_t vm, vbfloat16mf4x4_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf4x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_mu(vbool32_t vm, vbfloat16mf2x4_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16mf2x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m1x4_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, const __bf16 *rs1, vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_v_bf16m2x4_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei32.c new file mode 100644 index 0000000000000..3e1d4e325c4a8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei32.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf4x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf2x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16m1x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei64.c new file mode 100644 index 0000000000000..cc8c4dfc2b057 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei64.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf4x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf2x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16m1x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei8.c new file mode 100644 index 0000000000000..779368d55e95a --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg5ei8.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf4x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf2x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_tu(vbfloat16m1x5_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16m1x5_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_tum(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf4x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_tum(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf2x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_tum(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16m1x5_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_tumu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf4x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_tumu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf2x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_tumu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16m1x5_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_mu(vbool64_t vm, vbfloat16mf4x5_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf4x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_mu(vbool32_t vm, vbfloat16mf2x5_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16mf2x5_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_v_bf16m1x5_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei32.c new file mode 100644 index 0000000000000..dec5b0af5eab1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei32.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf4x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf2x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16m1x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei64.c new file mode 100644 index 0000000000000..463f026e4d897 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei64.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf4x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf2x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16m1x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei8.c new file mode 100644 index 0000000000000..88a89bd3c1480 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg6ei8.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf4x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf2x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_tu(vbfloat16m1x6_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16m1x6_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_tum(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf4x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_tum(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf2x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_tum(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16m1x6_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_tumu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf4x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_tumu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf2x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_tumu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16m1x6_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_mu(vbool64_t vm, vbfloat16mf4x6_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf4x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_mu(vbool32_t vm, vbfloat16mf2x6_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16mf2x6_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_v_bf16m1x6_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei32.c new file mode 100644 index 0000000000000..f14c2bd126226 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei32.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf4x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf2x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16m1x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei64.c new file mode 100644 index 0000000000000..c2cb327af0535 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei64.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf4x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf2x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16m1x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei8.c new file mode 100644 index 0000000000000..93b0a5539ff59 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg7ei8.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf4x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf2x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_tu(vbfloat16m1x7_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16m1x7_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_tum(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf4x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_tum(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf2x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_tum(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16m1x7_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_tumu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf4x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_tumu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf2x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_tumu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16m1x7_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_mu(vbool64_t vm, vbfloat16mf4x7_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf4x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_mu(vbool32_t vm, vbfloat16mf2x7_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16mf2x7_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_v_bf16m1x7_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei32.c new file mode 100644 index 0000000000000..b0e1656a27139 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei32.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf4x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf2x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16m1x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei64.c new file mode 100644 index 0000000000000..9820438b36135 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei64.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf4x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf2x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16m1x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei8.c new file mode 100644 index 0000000000000..5290e29b6bb05 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vluxseg8ei8.c @@ -0,0 +1,128 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf4x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf2x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_tu(vbfloat16m1x8_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16m1x8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_tum(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf4x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_tum(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf2x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_tum(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16m1x8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_tumu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf4x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_tumu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf2x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_tumu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16m1x8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_mu(vbool64_t vm, vbfloat16mf4x8_t vd, const __bf16 *rs1, vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf4x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_mu(vbool32_t vm, vbfloat16mf2x8_t vd, const __bf16 *rs1, vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16mf2x8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, const __bf16 *rs1, vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_v_bf16m1x8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei32.c new file mode 100644 index 0000000000000..82ea42b1b5b87 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei32.c @@ -0,0 +1,243 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vloxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vloxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei32_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei32_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei32_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vloxei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei32_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vloxei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei32_v_bf16m4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei32_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vloxei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei64.c new file mode 100644 index 0000000000000..ec6ee7d626b52 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei64.c @@ -0,0 +1,196 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei64_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei64_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei64_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vloxei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei64_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei64_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vloxei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei8.c new file mode 100644 index 0000000000000..93a6c28e4a79c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxei8.c @@ -0,0 +1,290 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, + vuint8m4_t rs2, size_t vl) { + return __riscv_vloxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vloxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vloxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vloxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, + const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vloxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vloxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vloxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vloxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, + const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vloxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vloxei8_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vloxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vloxei8_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vloxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vloxei8_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vloxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vloxei8_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vloxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vloxei8_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vloxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vloxei8_v_bf16m8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vloxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vloxei8_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, + const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vloxei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei32.c new file mode 100644 index 0000000000000..2dc68cf30319c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei32.c @@ -0,0 +1,264 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_tu(vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_tu(vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_tu(vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_tum(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_tum(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_tum(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_tum(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_tum(vbool4_t vm, + vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_tumu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_tumu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_tumu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_tumu(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_tumu(vbool4_t vm, + vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei32_v_bf16mf4x2_mu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei32_v_bf16mf2x2_mu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei32_v_bf16m1x2_mu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei32_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei32_v_bf16m4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei32_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei64.c new file mode 100644 index 0000000000000..aebef33ace64a --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei64.c @@ -0,0 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_tu(vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_tu(vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_tum(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_tum(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_tum(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_tum(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_tumu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxseg2ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_tumu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxseg2ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_tumu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_tumu(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei64_v_bf16mf4x2_mu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei64_v_bf16mf2x2_mu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei64_v_bf16m1x2_mu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei64_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei64_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg2ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei8.c new file mode 100644 index 0000000000000..fc11aef5bf5e0 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg2ei8.c @@ -0,0 +1,258 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_tu(vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_tu(vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_tu(vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_tum(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_tum(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_tum(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_tumu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_tumu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_tumu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_tumu(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_tumu(vbool4_t vm, + vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vloxseg2ei8_v_bf16mf4x2_mu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vloxseg2ei8_v_bf16mf2x2_mu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vloxseg2ei8_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vloxseg2ei8_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vloxseg2ei8_v_bf16m4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vloxseg2ei8_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vloxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei32.c new file mode 100644 index 0000000000000..ac1ac4aba264e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei32.c @@ -0,0 +1,214 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_tu(vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_tu(vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_tum(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg3ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_tum(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_tum(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_tum(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_tumu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg3ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_tumu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxseg3ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_tumu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_tumu(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei32_v_bf16mf4x3_mu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei32_v_bf16mf2x3_mu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei32_v_bf16m1x3_mu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei32_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei32_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei64.c new file mode 100644 index 0000000000000..86519aafc1d35 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei64.c @@ -0,0 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_tu(vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_tu(vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_tum(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_tum(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_tum(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_tum(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_tumu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxseg3ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_tumu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxseg3ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_tumu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_tumu(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei64_v_bf16mf4x3_mu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei64_v_bf16mf2x3_mu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei64_v_bf16m1x3_mu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei64_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei64_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg3ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei8.c new file mode 100644 index 0000000000000..92ea80dfd2f45 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg3ei8.c @@ -0,0 +1,209 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_tu(vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_tu(vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_tum(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_tum(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_tum(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_tumu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_tumu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_tumu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_tumu(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vloxseg3ei8_v_bf16mf4x3_mu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vloxseg3ei8_v_bf16mf2x3_mu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vloxseg3ei8_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vloxseg3ei8_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vloxseg3ei8_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg3ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei32.c new file mode 100644 index 0000000000000..7cb17187f7013 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei32.c @@ -0,0 +1,214 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_tu(vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_tu(vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_tum(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg4ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_tum(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_tum(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_tum(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_tumu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg4ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_tumu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxseg4ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_tumu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_tumu(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei32_v_bf16mf4x4_mu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei32_v_bf16mf2x4_mu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei32_v_bf16m1x4_mu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei32_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei32_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei64.c new file mode 100644 index 0000000000000..054d04ff16da1 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei64.c @@ -0,0 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_tu(vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_tu(vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_tum(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_tum(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_tum(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_tum(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_tumu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxseg4ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_tumu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxseg4ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_tumu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_tumu(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei64_v_bf16mf4x4_mu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei64_v_bf16mf2x4_mu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei64_v_bf16m1x4_mu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei64_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei64_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vloxseg4ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei8.c new file mode 100644 index 0000000000000..f1571233cdd9e --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg4ei8.c @@ -0,0 +1,209 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_tu(vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_tu(vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_tum(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_tum(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_tum(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_tumu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_tumu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_tumu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_tumu(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vloxseg4ei8_v_bf16mf4x4_mu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vloxseg4ei8_v_bf16mf2x4_mu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vloxseg4ei8_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vloxseg4ei8_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vloxseg4ei8_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vloxseg4ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei32.c new file mode 100644 index 0000000000000..3f57c002fe4c2 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei32.c @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_tu(vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_tum(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg5ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_tum(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_tum(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_tumu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg5ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_tumu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxseg5ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_tumu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei32_v_bf16mf4x5_mu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei32_v_bf16mf2x5_mu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei32_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei32_v_bf16m1x5_mu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei64.c new file mode 100644 index 0000000000000..fb9850cb0bff9 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei64.c @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_tu(vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_tum(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_tum(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_tum(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_tumu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxseg5ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_tumu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxseg5ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_tumu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei64_v_bf16mf4x5_mu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei64_v_bf16mf2x5_mu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei64_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei64_v_bf16m1x5_mu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg5ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei8.c new file mode 100644 index 0000000000000..06f3e8ffbf712 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg5ei8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_tu(vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_tum(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_tum(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_tum(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_tumu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_tumu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_tumu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vloxseg5ei8_v_bf16mf4x5_mu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vloxseg5ei8_v_bf16mf2x5_mu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vloxseg5ei8_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vloxseg5ei8_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg5ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei32.c new file mode 100644 index 0000000000000..94e44d09e8313 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei32.c @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_tu(vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_tum(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg6ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_tum(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_tum(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_tumu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg6ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_tumu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxseg6ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_tumu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei32_v_bf16mf4x6_mu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei32_v_bf16mf2x6_mu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei32_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei32_v_bf16m1x6_mu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei64.c new file mode 100644 index 0000000000000..2981b18c491d4 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei64.c @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_tu(vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_tum(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_tum(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_tum(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_tumu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxseg6ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_tumu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxseg6ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_tumu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei64_v_bf16mf4x6_mu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei64_v_bf16mf2x6_mu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei64_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei64_v_bf16m1x6_mu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg6ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei8.c new file mode 100644 index 0000000000000..23fa390aef0dd --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg6ei8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_tu(vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_tum(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_tum(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_tum(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_tumu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_tumu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_tumu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vloxseg6ei8_v_bf16mf4x6_mu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vloxseg6ei8_v_bf16mf2x6_mu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vloxseg6ei8_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vloxseg6ei8_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg6ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei32.c new file mode 100644 index 0000000000000..f3293d0398585 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei32.c @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_tu(vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_tum(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg7ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_tum(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_tum(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_tumu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg7ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_tumu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxseg7ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_tumu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei32_v_bf16mf4x7_mu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei32_v_bf16mf2x7_mu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei32_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei32_v_bf16m1x7_mu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei64.c new file mode 100644 index 0000000000000..10209cc2192ed --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei64.c @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_tu(vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_tum(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_tum(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_tum(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_tumu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxseg7ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_tumu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxseg7ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_tumu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei64_v_bf16mf4x7_mu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei64_v_bf16mf2x7_mu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei64_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei64_v_bf16m1x7_mu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg7ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei8.c new file mode 100644 index 0000000000000..8f03ff1603085 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg7ei8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_tu(vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_tum(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_tum(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_tum(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_tumu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_tumu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_tumu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vloxseg7ei8_v_bf16mf4x7_mu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vloxseg7ei8_v_bf16mf2x7_mu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vloxseg7ei8_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vloxseg7ei8_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg7ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei32.c new file mode 100644 index 0000000000000..a8c0d4909af09 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei32.c @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_tu(vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_tum(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg8ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_tum(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_tum(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_tumu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vloxseg8ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_tumu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vloxseg8ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_tumu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei32_v_bf16mf4x8_mu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei32_v_bf16mf2x8_mu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei32_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei32_v_bf16m1x8_mu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei64.c new file mode 100644 index 0000000000000..08c8fdc56c9cb --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei64.c @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_tu(vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_tum(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_tum(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_tum(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_tumu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vloxseg8ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_tumu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vloxseg8ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_tumu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei64_v_bf16mf4x8_mu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei64_v_bf16mf2x8_mu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei64_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei64_v_bf16m1x8_mu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vloxseg8ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei8.c new file mode 100644 index 0000000000000..247dcfb578294 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vloxseg8ei8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_tu(vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_tum(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_tum(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_tum(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_tumu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_tumu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_tumu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vloxseg8ei8_v_bf16mf4x8_mu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vloxseg8ei8_v_bf16mf2x8_mu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vloxseg8ei8_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vloxseg8ei8_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vloxseg8ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei32.c new file mode 100644 index 0000000000000..eef643f85e038 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei32.c @@ -0,0 +1,243 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vluxei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vluxei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei32_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei32_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei32_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint32m2_t rs2, + size_t vl) { + return __riscv_vluxei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei32_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint32m4_t rs2, + size_t vl) { + return __riscv_vluxei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei32_v_bf16m4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i32.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei32_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint32m8_t rs2, + size_t vl) { + return __riscv_vluxei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei64.c new file mode 100644 index 0000000000000..08eac74a0163b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei64.c @@ -0,0 +1,196 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei64_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei64_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei64_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint64m4_t rs2, + size_t vl) { + return __riscv_vluxei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei64_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i64.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei64_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint64m8_t rs2, + size_t vl) { + return __riscv_vluxei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei8.c new file mode 100644 index 0000000000000..88c00a6c86423 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxei8.c @@ -0,0 +1,290 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_tu(vbfloat16mf4_t vd, const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_tu(vbfloat16mf2_t vd, const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_tu(vbfloat16m1_t vd, const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_tu(vbfloat16m2_t vd, const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_tu(vbfloat16m4_t vd, const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_tu( +// CHECK-RV64-SAME: [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_tu(vbfloat16m8_t vd, const __bf16 *rs1, + vuint8m4_t rs2, size_t vl) { + return __riscv_vluxei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vluxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vluxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vluxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, + const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vluxei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vluxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vluxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vluxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, + const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vluxei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv1bf16.p0.nxv1i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf4_t test_vluxei8_v_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, + const __bf16 *rs1, vuint8mf8_t rs2, + size_t vl) { + return __riscv_vluxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16mf2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv2bf16.p0.nxv2i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16mf2_t test_vluxei8_v_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, + const __bf16 *rs1, vuint8mf4_t rs2, + size_t vl) { + return __riscv_vluxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m1_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv4bf16.p0.nxv4i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m1_t test_vluxei8_v_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, + const __bf16 *rs1, vuint8mf2_t rs2, + size_t vl) { + return __riscv_vluxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv8bf16.p0.nxv8i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m2_t test_vluxei8_v_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, + const __bf16 *rs1, vuint8m1_t rs2, + size_t vl) { + return __riscv_vluxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv16bf16.p0.nxv16i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m4_t test_vluxei8_v_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, + const __bf16 *rs1, vuint8m2_t rs2, + size_t vl) { + return __riscv_vluxei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local @test_vluxei8_v_bf16m8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vluxei.mask.nxv32bf16.p0.nxv32i8.i64( [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vbfloat16m8_t test_vluxei8_v_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, + const __bf16 *rs1, vuint8m4_t rs2, + size_t vl) { + return __riscv_vluxei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei32.c new file mode 100644 index 0000000000000..a6d5aa949b4d8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei32.c @@ -0,0 +1,264 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_tu(vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_tu(vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_tu(vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_tum(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_tum(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_tum(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_tum(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_tum(vbool4_t vm, + vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_tumu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_tumu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_tumu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_tumu(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_tumu(vbool4_t vm, + vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei32_v_bf16mf4x2_mu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei32_v_bf16mf2x2_mu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei32_v_bf16m1x2_mu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei32_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei32_v_bf16m4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i32.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei32_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint32m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei64.c new file mode 100644 index 0000000000000..f19c96521a7a6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei64.c @@ -0,0 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_tu(vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_tu(vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_tum(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_tum(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_tum(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_tum(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_tumu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxseg2ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_tumu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxseg2ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_tumu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_tumu(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei64_v_bf16mf4x2_mu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei64_v_bf16mf2x2_mu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei64_v_bf16m1x2_mu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei64_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei64_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg2ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei8.c new file mode 100644 index 0000000000000..07e820458e709 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg2ei8.c @@ -0,0 +1,258 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_tu(vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_tu(vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_tu(vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_tu(vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_tu(vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_tum(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_tum(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_tum(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_tum(vbool8_t vm, vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_tum(vbool4_t vm, vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_tumu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_tumu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_tumu(vbool16_t vm, + vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_tumu(vbool8_t vm, + vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_tumu(vbool4_t vm, + vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv2i8_2t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf4x2_t test_vluxseg2ei8_v_bf16mf4x2_mu(vbool64_t vm, + vbfloat16mf4x2_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16mf2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16mf2x2_t test_vluxseg2ei8_v_bf16mf2x2_mu(vbool32_t vm, + vbfloat16mf2x2_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m1x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv8i8_2t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m1x2_t test_vluxseg2ei8_v_bf16m1x2_mu(vbool16_t vm, vbfloat16m1x2_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m2x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv16i8_2t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m2x2_t test_vluxseg2ei8_v_bf16m2x2_mu(vbool8_t vm, vbfloat16m2x2_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 2) @test_vluxseg2ei8_v_bf16m4x2_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 2) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv32i8_2t.p0.nxv16i8.nxv16i1.i64(target("riscv.vector.tuple", , 2) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 2) [[TMP0]] +// +vbfloat16m4x2_t test_vluxseg2ei8_v_bf16m4x2_mu(vbool4_t vm, vbfloat16m4x2_t vd, + const __bf16 *rs1, + vuint8m2_t rs2, size_t vl) { + return __riscv_vluxseg2ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei32.c new file mode 100644 index 0000000000000..05bece790e068 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei32.c @@ -0,0 +1,214 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_tu(vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_tu(vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_tum(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg3ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_tum(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_tum(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_tum(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_tumu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg3ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_tumu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxseg3ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_tumu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_tumu(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei32_v_bf16mf4x3_mu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei32_v_bf16mf2x3_mu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei32_v_bf16m1x3_mu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei32_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei32_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei64.c new file mode 100644 index 0000000000000..16898e7e27471 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei64.c @@ -0,0 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_tu(vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_tu(vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_tum(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_tum(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_tum(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_tum(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_tumu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxseg3ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_tumu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxseg3ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_tumu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_tumu(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei64_v_bf16mf4x3_mu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei64_v_bf16mf2x3_mu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei64_v_bf16m1x3_mu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei64_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei64_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg3ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei8.c new file mode 100644 index 0000000000000..b48554b75eadc --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg3ei8.c @@ -0,0 +1,209 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_tu(vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_tu(vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_tu(vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_tu(vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_tum(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_tum(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_tum(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_tum(vbool8_t vm, vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_tumu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_tumu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_tumu(vbool16_t vm, + vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_tumu(vbool8_t vm, + vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf4x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv2i8_3t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf4x3_t test_vluxseg3ei8_v_bf16mf4x3_mu(vbool64_t vm, + vbfloat16mf4x3_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16mf2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16mf2x3_t test_vluxseg3ei8_v_bf16mf2x3_mu(vbool32_t vm, + vbfloat16mf2x3_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m1x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv8i8_3t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m1x3_t test_vluxseg3ei8_v_bf16m1x3_mu(vbool16_t vm, vbfloat16m1x3_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 3) @test_vluxseg3ei8_v_bf16m2x3_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 3) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv16i8_3t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 3) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 3) [[TMP0]] +// +vbfloat16m2x3_t test_vluxseg3ei8_v_bf16m2x3_mu(vbool8_t vm, vbfloat16m2x3_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg3ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei32.c new file mode 100644 index 0000000000000..dfd314f8f23ed --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei32.c @@ -0,0 +1,214 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_tu(vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_tu(vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_tum(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg4ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_tum(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_tum(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_tum(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_tumu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg4ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_tumu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxseg4ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_tumu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_tumu(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei32_v_bf16mf4x4_mu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei32_v_bf16mf2x4_mu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei32_v_bf16m1x4_mu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei32_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i32.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei32_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint32m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei64.c new file mode 100644 index 0000000000000..a8f779af29cd8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei64.c @@ -0,0 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_tu(vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_tu(vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_tum(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_tum(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_tum(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_tum(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_tumu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxseg4ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_tumu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxseg4ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_tumu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_tumu(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei64_v_bf16mf4x4_mu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei64_v_bf16mf2x4_mu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei64_v_bf16m1x4_mu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei64_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i64.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei64_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint64m8_t rs2, size_t vl) { + return __riscv_vluxseg4ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei8.c new file mode 100644 index 0000000000000..b3fc409391c6a --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg4ei8.c @@ -0,0 +1,209 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_tu(vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_tu(vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_tu(vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_tu(vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_tum(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_tum(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_tum(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_tum(vbool8_t vm, vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_tumu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_tumu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_tumu(vbool16_t vm, + vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_tumu(vbool8_t vm, + vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf4x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv2i8_4t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf4x4_t test_vluxseg4ei8_v_bf16mf4x4_mu(vbool64_t vm, + vbfloat16mf4x4_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16mf2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16mf2x4_t test_vluxseg4ei8_v_bf16mf2x4_mu(vbool32_t vm, + vbfloat16mf2x4_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m1x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv8i8_4t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m1x4_t test_vluxseg4ei8_v_bf16m1x4_mu(vbool16_t vm, vbfloat16m1x4_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 4) @test_vluxseg4ei8_v_bf16m2x4_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 4) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv8i8.nxv8i1.i64(target("riscv.vector.tuple", , 4) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 4) [[TMP0]] +// +vbfloat16m2x4_t test_vluxseg4ei8_v_bf16m2x4_mu(vbool8_t vm, vbfloat16m2x4_t vd, + const __bf16 *rs1, + vuint8m1_t rs2, size_t vl) { + return __riscv_vluxseg4ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei32.c new file mode 100644 index 0000000000000..f40d2b8e162e9 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei32.c @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_tu(vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_tum(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg5ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_tum(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_tum(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_tumu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg5ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_tumu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxseg5ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_tumu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei32_v_bf16mf4x5_mu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei32_v_bf16mf2x5_mu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei32_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei32_v_bf16m1x5_mu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei64.c new file mode 100644 index 0000000000000..da3efe94fafbf --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei64.c @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_tu(vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_tum(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_tum(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_tum(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_tumu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxseg5ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_tumu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxseg5ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_tumu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei64_v_bf16mf4x5_mu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei64_v_bf16mf2x5_mu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei64_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei64_v_bf16m1x5_mu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg5ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei8.c new file mode 100644 index 0000000000000..422a271e583aa --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg5ei8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_tu(vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_tu(vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_tu(vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_tum(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_tum(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_tum(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_tumu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_tumu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_tumu(vbool16_t vm, + vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf4x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv2i8_5t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf4x5_t test_vluxseg5ei8_v_bf16mf4x5_mu(vbool64_t vm, + vbfloat16mf4x5_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16mf2x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16mf2x5_t test_vluxseg5ei8_v_bf16mf2x5_mu(vbool32_t vm, + vbfloat16mf2x5_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 5) @test_vluxseg5ei8_v_bf16m1x5_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 5) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv8i8_5t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 5) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 5) [[TMP0]] +// +vbfloat16m1x5_t test_vluxseg5ei8_v_bf16m1x5_mu(vbool16_t vm, vbfloat16m1x5_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg5ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei32.c new file mode 100644 index 0000000000000..ecdd9cc8ff315 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei32.c @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_tu(vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_tum(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg6ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_tum(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_tum(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_tumu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg6ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_tumu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxseg6ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_tumu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei32_v_bf16mf4x6_mu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei32_v_bf16mf2x6_mu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei32_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei32_v_bf16m1x6_mu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei64.c new file mode 100644 index 0000000000000..d428a81cfddc8 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei64.c @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_tu(vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_tum(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_tum(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_tum(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_tumu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxseg6ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_tumu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxseg6ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_tumu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei64_v_bf16mf4x6_mu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei64_v_bf16mf2x6_mu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei64_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei64_v_bf16m1x6_mu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg6ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei8.c new file mode 100644 index 0000000000000..cb38825634e43 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg6ei8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_tu(vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_tu(vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_tu(vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_tum(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_tum(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_tum(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_tumu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_tumu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_tumu(vbool16_t vm, + vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf4x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv2i8_6t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf4x6_t test_vluxseg6ei8_v_bf16mf4x6_mu(vbool64_t vm, + vbfloat16mf4x6_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16mf2x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16mf2x6_t test_vluxseg6ei8_v_bf16mf2x6_mu(vbool32_t vm, + vbfloat16mf2x6_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 6) @test_vluxseg6ei8_v_bf16m1x6_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 6) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv8i8_6t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 6) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 6) [[TMP0]] +// +vbfloat16m1x6_t test_vluxseg6ei8_v_bf16m1x6_mu(vbool16_t vm, vbfloat16m1x6_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg6ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei32.c new file mode 100644 index 0000000000000..c446be91583f5 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei32.c @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_tu(vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_tum(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg7ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_tum(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_tum(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_tumu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg7ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_tumu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxseg7ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_tumu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei32_v_bf16mf4x7_mu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei32_v_bf16mf2x7_mu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei32_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei32_v_bf16m1x7_mu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei64.c new file mode 100644 index 0000000000000..06f159dab8630 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei64.c @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_tu(vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_tum(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_tum(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_tum(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_tumu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxseg7ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_tumu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxseg7ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_tumu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei64_v_bf16mf4x7_mu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei64_v_bf16mf2x7_mu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei64_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei64_v_bf16m1x7_mu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg7ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei8.c new file mode 100644 index 0000000000000..8f91c7d642553 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg7ei8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_tu(vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_tu(vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_tu(vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_tum(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_tum(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_tum(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_tumu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_tumu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_tumu(vbool16_t vm, + vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf4x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv2i8_7t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf4x7_t test_vluxseg7ei8_v_bf16mf4x7_mu(vbool64_t vm, + vbfloat16mf4x7_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16mf2x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16mf2x7_t test_vluxseg7ei8_v_bf16mf2x7_mu(vbool32_t vm, + vbfloat16mf2x7_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 7) @test_vluxseg7ei8_v_bf16m1x7_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 7) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv8i8_7t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 7) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 7) [[TMP0]] +// +vbfloat16m1x7_t test_vluxseg7ei8_v_bf16m1x7_mu(vbool16_t vm, vbfloat16m1x7_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg7ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei32.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei32.c new file mode 100644 index 0000000000000..0fef431e84c08 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei32.c @@ -0,0 +1,164 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_tu(vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_tum(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg8ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_tum(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_tum(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_tumu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, + size_t vl) { + return __riscv_vluxseg8ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_tumu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, + size_t vl) { + return __riscv_vluxseg8ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_tumu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i32.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei32_v_bf16mf4x8_mu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint32mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i32.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei32_v_bf16mf2x8_mu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint32m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei32_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i32.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei32_v_bf16m1x8_mu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint32m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei32_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei64.c new file mode 100644 index 0000000000000..5e941b53727de --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei64.c @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_tu(vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_tum(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_tum(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_tum(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_tumu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, + size_t vl) { + return __riscv_vluxseg8ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_tumu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, + size_t vl) { + return __riscv_vluxseg8ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_tumu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i64.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei64_v_bf16mf4x8_mu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint64m1_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i64.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei64_v_bf16mf2x8_mu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint64m2_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei64_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i64.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei64_v_bf16m1x8_mu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint64m4_t rs2, size_t vl) { + return __riscv_vluxseg8ei64_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei8.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei8.c new file mode 100644 index 0000000000000..49cb2aa8921de --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vluxseg8ei8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \ +// RUN: -target-feature +zvfbfmin -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_tu(vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_tu(vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_tu( +// CHECK-RV64-SAME: target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], i64 [[VL]], i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_tu(vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tu(vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_tum(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_tum(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_tum( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 2, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_tum(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tum(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_tumu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_tumu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_tumu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 0, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_tumu(vbool16_t vm, + vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_tumu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf4x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv2i8_8t.p0.nxv1i8.nxv1i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf4x8_t test_vluxseg8ei8_v_bf16mf4x8_mu(vbool64_t vm, + vbfloat16mf4x8_t vd, + const __bf16 *rs1, + vuint8mf8_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16mf2x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv2i8.nxv2i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16mf2x8_t test_vluxseg8ei8_v_bf16mf2x8_mu(vbool32_t vm, + vbfloat16mf2x8_t vd, + const __bf16 *rs1, + vuint8mf4_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_mu(vm, vd, rs1, rs2, vl); +} + +// CHECK-RV64-LABEL: define dso_local target("riscv.vector.tuple", , 8) @test_vluxseg8ei8_v_bf16m1x8_mu( +// CHECK-RV64-SAME: [[VM:%.*]], target("riscv.vector.tuple", , 8) [[VD:%.*]], ptr noundef [[RS1:%.*]], [[RS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv8i8_8t.p0.nxv4i8.nxv4i1.i64(target("riscv.vector.tuple", , 8) [[VD]], ptr [[RS1]], [[RS2]], [[VM]], i64 [[VL]], i64 1, i64 4) +// CHECK-RV64-NEXT: ret target("riscv.vector.tuple", , 8) [[TMP0]] +// +vbfloat16m1x8_t test_vluxseg8ei8_v_bf16m1x8_mu(vbool16_t vm, vbfloat16m1x8_t vd, + const __bf16 *rs1, + vuint8mf2_t rs2, size_t vl) { + return __riscv_vluxseg8ei8_mu(vm, vd, rs1, rs2, vl); +} diff --git a/clang/test/CodeGen/enum3.c b/clang/test/CodeGen/enum3.c new file mode 100644 index 0000000000000..6878a0bbb94d0 --- /dev/null +++ b/clang/test/CodeGen/enum3.c @@ -0,0 +1,26 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -Wno-error=underlying-atomic-qualifier-ignored -std=c23 %s -emit-llvm -o - | FileCheck %s + +// Ensure that an "atomic" underlying type has no actual atomic semantics +// because the qualifier is stripped. + +enum E : _Atomic(int) { + Foo +}; + +// CHECK-LABEL: define {{.*}} void @test( +// CHECK-SAME: i32 noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[E_ADDR:%.*]] = alloca i32 +// CHECK-NEXT: [[X:%.*]] = alloca i32 +// CHECK-NEXT: store i32 [[E]], ptr [[E_ADDR]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[E_ADDR]] +// CHECK-NEXT: store i32 [[TMP0]], ptr [[X]] +// CHECK-NEXT: store i32 0, ptr [[E_ADDR]] +// CHECK-NEXT: ret void +// +void test(enum E e) { + int x = e; + e = Foo; +} + diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index bbb55b7e14941..bfbed79dc7f16 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -209,9 +209,9 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 // NOCPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 // NOCPU-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8 -// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 -// NOCPU-NEXT: store i64 100, ptr [[TMP18]], align 8 -// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) +// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0 +// NOCPU-NEXT: store i64 100, ptr addrspace(5) [[TMP18]], align 8 +// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr addrspace(5) [[TMP18]]) // NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 // NOCPU-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8 // NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1 @@ -587,9 +587,9 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]] -// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 -// GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8 -// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) +// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0 +// GFX900-NEXT: store i64 100, ptr addrspace(5) [[TMP18]], align 8 +// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr addrspace(5) [[TMP18]]) // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]] // GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 421099d3876e3..a1b91d0cc38dc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -24,6 +24,24 @@ void test_s_monitor_sleep() { __builtin_amdgcn_s_monitor_sleep(10); } +// CHECK-LABEL: @test_s_wait_asynccnt( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.amdgcn.s.wait.asynccnt(i16 0) +// CHECK-NEXT: ret void +// +void test_s_wait_asynccnt() { + __builtin_amdgcn_s_wait_asynccnt(0); +} + +// CHECK-LABEL: @test_s_wait_tensorcnt( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.amdgcn.s.wait.tensorcnt(i16 0) +// CHECK-NEXT: ret void +// +void test_s_wait_tensorcnt() { + __builtin_amdgcn_s_wait_tensorcnt(0); +} + // CHECK-LABEL: @test_cvt_f16_fp8( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/clang/test/Driver/frame-pointer-elim.c b/clang/test/Driver/frame-pointer-elim.c index 0dd7eb0c738db..f00940bd7613d 100644 --- a/clang/test/Driver/frame-pointer-elim.c +++ b/clang/test/Driver/frame-pointer-elim.c @@ -44,8 +44,8 @@ // RUN: FileCheck --check-prefix=KEEP-NONE %s // -pg -fomit-frame-pointer => error. -// RUN: not %clang -### -S -fomit-frame-pointer -pg %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-OMIT-FP-PG %s -// RUN: %clang -### -S -fomit-frame-pointer -fno-omit-frame-pointer -pg %s 2>&1 | FileCheck -check-prefix=CHECK-MIX-NO-OMIT-FP-PG %s +// RUN: not %clang -### --target=i386-linux -S -fomit-frame-pointer -pg %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-OMIT-FP-PG %s +// RUN: %clang -### --target=i386-linux -S -fomit-frame-pointer -fno-omit-frame-pointer -pg %s 2>&1 | FileCheck -check-prefix=CHECK-MIX-NO-OMIT-FP-PG %s // CHECK-NO-MIX-OMIT-FP-PG: '-fomit-frame-pointer' not allowed with '-pg' // CHECK-MIX-NO-OMIT-FP-PG-NOT: '-fomit-frame-pointer' not allowed with '-pg' diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index 62e7c9588ce66..77f4cfb5f3a43 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -395,3 +395,12 @@ // RUN: --offload-arch=sm_52 -foffload-lto=thin -nogpulib -nogpuinc %s 2>&1 \ // RUN: | FileCheck --check-prefix=THINLTO-SM52 %s // THINLTO-SM52: --device-compiler=nvptx64-nvidia-cuda=-flto=thin + +// +// Check the requested architecture is passed if provided. +// +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: --offload-arch=gfx906 -foffload-lto=thin -nogpulib -nogpuinc %s 2>&1 \ +// RUN: | FileCheck --check-prefix=SHOULD-EXTRACT %s +// +// SHOULD-EXTRACT: clang-linker-wrapper{{.*}}"--should-extract=gfx906" diff --git a/clang/test/OpenMP/parallel_ast_print.cpp b/clang/test/OpenMP/parallel_ast_print.cpp index 948baaff30d89..15439ea31215a 100644 --- a/clang/test/OpenMP/parallel_ast_print.cpp +++ b/clang/test/OpenMP/parallel_ast_print.cpp @@ -173,13 +173,13 @@ T tmain(T argc, T *argv) { foo(); #endif #ifdef OMP60 -#pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(strict: C) copyin(S::TS, thrp) proc_bind(primary) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10]) +#pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(strict: C) copyin(S::TS, thrp) proc_bind(primary) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10]) message("msg") severity(fatal) foo(); #endif #pragma omp parallel if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(default, && : g) reduction(task,+:argc) foo(); #ifdef OMP60 -#pragma omp parallel if (C) num_threads(strict: s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(default, && : g) reduction(task,+:argc) +#pragma omp parallel if (C) num_threads(strict: s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(default, && : g) reduction(task,+:argc) message("msg") severity(warning) foo(); #endif return 0; @@ -196,11 +196,11 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: foo() // OMP51-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) // OMP51-NEXT: foo() -// OMP60-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(strict: C) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) +// OMP60-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(strict: C) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) message("msg") severity(fatal) // OMP60-NEXT: foo() // CHECK-NEXT: #pragma omp parallel if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(default, &&: g) reduction(task, +: argc) // CHECK-NEXT: foo() -// OMP60-NEXT: #pragma omp parallel if(C) num_threads(strict: s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(default, &&: g) reduction(task, +: argc) +// OMP60-NEXT: #pragma omp parallel if(C) num_threads(strict: s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(default, &&: g) reduction(task, +: argc) message("msg") severity(warning) // OMP60-NEXT: foo() // CHECK: template<> int tmain(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -213,11 +213,11 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: foo() // OMP51-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) // OMP51-NEXT: foo() -// OMP60-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(strict: 5) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) +// OMP60-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(strict: 5) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) message("msg") severity(fatal) // OMP60-NEXT: foo() // CHECK-NEXT: #pragma omp parallel if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(default, &&: g) reduction(task, +: argc) // CHECK-NEXT: foo() -// OMP60-NEXT: #pragma omp parallel if(5) num_threads(strict: s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(default, &&: g) reduction(task, +: argc) +// OMP60-NEXT: #pragma omp parallel if(5) num_threads(strict: s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(default, &&: g) reduction(task, +: argc) message("msg") severity(warning) // OMP60-NEXT: foo() // CHECK: template<> long tmain(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -230,11 +230,11 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: foo() // OMP51-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) // OMP51-NEXT: foo() -// OMP60-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(strict: 1) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) +// OMP60-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(strict: 1) copyin(S::TS,thrp) proc_bind(primary) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) message("msg") severity(fatal) // OMP60-NEXT: foo() // CHECK-NEXT: #pragma omp parallel if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(default, &&: g) reduction(task, +: argc) // CHECK-NEXT: foo() -// OMP60-NEXT: #pragma omp parallel if(1) num_threads(strict: s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(default, &&: g) reduction(task, +: argc) +// OMP60-NEXT: #pragma omp parallel if(1) num_threads(strict: s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(default, &&: g) reduction(task, +: argc) message("msg") severity(warning) // OMP60-NEXT: foo() enum Enum { }; @@ -256,8 +256,8 @@ int main (int argc, char **argv) { foo(); // CHECK-NEXT: foo(); #ifdef OMP60 -#pragma omp parallel default(none), private(argc,b) firstprivate(argv) if (parallel: argc > 0) num_threads(strict: ee) copyin(a) proc_bind(spread) reduction(| : c, d, arr1[argc]) reduction(* : e, arr[:10][0:argc]) allocate(e) -// OMP60-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) if(parallel: argc > 0) num_threads(strict: ee) copyin(a) proc_bind(spread) reduction(|: c,d,arr1[argc]) reduction(*: e,arr[:10][0:argc]) allocate(e) +#pragma omp parallel default(none), private(argc,b) firstprivate(argv) if (parallel: argc > 0) num_threads(strict: ee) copyin(a) proc_bind(spread) reduction(| : c, d, arr1[argc]) reduction(* : e, arr[:10][0:argc]) allocate(e) message("msg") severity(fatal) +// OMP60-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) if(parallel: argc > 0) num_threads(strict: ee) copyin(a) proc_bind(spread) reduction(|: c,d,arr1[argc]) reduction(*: e,arr[:10][0:argc]) allocate(e) message("msg") severity(fatal) foo(); // OMP60-NEXT: foo(); #endif @@ -266,8 +266,8 @@ int main (int argc, char **argv) { foo(); // CHECK-NEXT: foo() #ifdef OMP60 -#pragma omp parallel allocate(e) if (b) num_threads(strict: c) proc_bind(close) reduction(^:e, f) reduction(&& : g, arr[0:argc][:10]) -// OMP60-NEXT: #pragma omp parallel allocate(e) if(b) num_threads(strict: c) proc_bind(close) reduction(^: e,f) reduction(&&: g,arr[0:argc][:10]) +#pragma omp parallel allocate(e) if (b) num_threads(strict: c) proc_bind(close) reduction(^:e, f) reduction(&& : g, arr[0:argc][:10]) message("msg") severity(warning) +// OMP60-NEXT: #pragma omp parallel allocate(e) if(b) num_threads(strict: c) proc_bind(close) reduction(^: e,f) reduction(&&: g,arr[0:argc][:10]) message("msg") severity(warning) foo(); // OMP60-NEXT: foo() #endif diff --git a/clang/test/OpenMP/parallel_message_messages.cpp b/clang/test/OpenMP/parallel_message_messages.cpp new file mode 100644 index 0000000000000..470fadc032280 --- /dev/null +++ b/clang/test/OpenMP/parallel_message_messages.cpp @@ -0,0 +1,89 @@ +// RUN: %clang_cc1 -verify=expected -fopenmp -fopenmp-version=60 -ferror-limit 100 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected -fopenmp-simd -fopenmp-version=60 -ferror-limit 100 %s -Wuninitialized + +void foo() {} + +template +T tmain(T argc, S **argv) { + // Correct usage + #pragma omp parallel message("correct message") + + // Missing parentheses + #pragma omp parallel message // expected-error {{expected '(' after 'message'}} + + // Empty parentheses + #pragma omp parallel message() // expected-error {{expected expression}} + + // Non-string literal + #pragma omp parallel message(123) // expected-warning {{expected string literal in 'clause message' - ignoring}} + #pragma omp parallel message(argc) // expected-warning {{expected string literal in 'clause message' - ignoring}} + #pragma omp parallel message(argv[0]) // expected-warning {{expected string literal in 'clause message' - ignoring}} + + // Multiple arguments + #pragma omp parallel message("msg1", "msg2") // expected-error {{expected ')'}} expected-note {{to match this '('}} + + // Unterminated string + // expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-warning@+1 {{missing terminating '"' character}} expected-note@+1 {{to match this '('}} + #pragma omp parallel message("unterminated + + // Unterminated clause + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp parallel message("msg" + + // Extra tokens after clause + #pragma omp parallel message("msg") extra // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} + + // Multiple message clauses + #pragma omp parallel message("msg1") message("msg2") // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'message' clause}} + + // Message clause with other clauses (should be valid, but test for interaction) + #pragma omp parallel message("msg") num_threads(2) + + // Message clause with invalid clause + #pragma omp parallel message("msg") invalid_clause // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} + + // Message clause with missing string and other clause + #pragma omp parallel message() num_threads(2) // expected-error {{expected expression}} + + // Message clause with macro that is not a string + #define NOT_A_STRING 123 + #pragma omp parallel message(NOT_A_STRING) // expected-warning {{expected string literal in 'clause message' - ignoring}} + + // Message clause with template parameter that is not a string + #pragma omp parallel message(N) // expected-warning {{expected string literal in 'clause message' - ignoring}} + + // Message clause with macro that is a string + #define A_STRING "macro string" + #pragma omp parallel message(A_STRING) + + // Message clause with concatenated string literals + #pragma omp parallel message("hello" " world") + + // Message clause with wide string literal + #pragma omp parallel message(L"wide string") + + // Message clause with UTF-8 string literal + #pragma omp parallel message(u8"utf8 string") + + // Message clause with raw string literal + #pragma omp parallel message(R"(raw string)") + + foo(); + + return argc; +} + +int main(int argc, char **argv) { + // Correct usage + #pragma omp parallel message("main correct") + + // Invalid: missing string + #pragma omp parallel message() // expected-error {{expression}} + + // Invalid: non-string + #pragma omp parallel message(argc) // expected-warning {{expected string literal in 'clause message' - ignoring}} + + foo(); + + return tmain(argc, argv); +} diff --git a/clang/test/OpenMP/parallel_severity_messages.cpp b/clang/test/OpenMP/parallel_severity_messages.cpp new file mode 100644 index 0000000000000..b1cff762d9bd8 --- /dev/null +++ b/clang/test/OpenMP/parallel_severity_messages.cpp @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -verify=expected -fopenmp -fopenmp-version=60 -ferror-limit 100 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected -fopenmp-simd -fopenmp-version=60 -ferror-limit 100 %s -Wuninitialized + +void foo() {} + +template +T tmain(T argc, S **argv) { + // Correct usages + #pragma omp parallel severity(fatal) + #pragma omp parallel severity(warning) + + // Missing parentheses + #pragma omp parallel severity // expected-error {{expected '(' after 'severity'}} + + // Empty parentheses + #pragma omp parallel severity() // expected-error {{expected 'fatal' or 'warning' in OpenMP clause 'severity'}} + + // Invalid value + #pragma omp parallel severity(error) // expected-error {{expected 'fatal' or 'warning' in OpenMP clause 'severity'}} + #pragma omp parallel severity(unknown) // expected-error {{expected 'fatal' or 'warning' in OpenMP clause 'severity'}} + + // Multiple arguments + #pragma omp parallel severity(fatal, warning) // expected-error {{expected ')'}} expected-note {{to match this '('}} + + // Unterminated clause + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp parallel severity(fatal + + // Extra tokens after clause + #pragma omp parallel severity(fatal) extra // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} + + // Multiple severity clauses + #pragma omp parallel severity(fatal) severity(warning) // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'severity' clause}} + + // Severity clause with other clauses (should be valid) + #pragma omp parallel severity(warning) num_threads(2) + + // Severity clause with invalid clause + #pragma omp parallel severity(fatal) invalid_clause // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} + + // Severity clause with macro that is not a valid value + #define NOT_A_SEVERITY 123 + #pragma omp parallel severity(NOT_A_SEVERITY) // expected-error {{expected 'fatal' or 'warning' in OpenMP clause 'severity'}} + + // Severity clause with macro that is a valid value + #define FATAL fatal + #pragma omp parallel severity(FATAL) + + // Severity clause with template parameter that is not a valid value + #pragma omp parallel severity(N) // expected-error {{expected 'fatal' or 'warning' in OpenMP clause 'severity'}} + + foo(); + + return argc; +} + +int main(int argc, char **argv) { + // Correct usage + #pragma omp parallel severity(fatal) + + // Invalid: missing value + #pragma omp parallel severity() // expected-error {{expected 'fatal' or 'warning' in OpenMP clause 'severity'}} + + // Invalid: non-keyword + #pragma omp parallel severity(argc) // expected-error {{expected 'fatal' or 'warning' in OpenMP clause 'severity'}} + + foo(); + + return tmain(argc, argv); +} diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 86d51820ae5b5..e82d825704439 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2556,25 +2556,25 @@ // RUN: %clang -march=sierraforest -m32 -E -dM %s -o - 2>&1 \ // RUN: --target=i386 \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_SRF_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_SRF_M32,CHECK_KL_M32 // RUN: %clang -march=grandridge -m32 -E -dM %s -o - 2>&1 \ // RUN: --target=i386 \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_SRF_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_SRF_M32,CHECK_KL_M32 // RUN: %clang -march=arrowlake -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ARL_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_KL_M32 // RUN: %clang -march=arrowlake-s -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_KL_M32 // RUN: %clang -march=lunarlake -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_KL_M32 // RUN: %clang -march=pantherlake -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_PTL_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_PTL_M32,CHECK_NKL_M32 // RUN: %clang -march=clearwaterforest -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_PTL_M32,CHECK_CWF_M32 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_PTL_M32,CHECK_CWF_M32,CHECK_NKL_M32 // CHECK_ARL_M32: #define __ADX__ 1 // CHECK_ARL_M32: #define __AES__ 1 // CHECK_ARL_M32: #define __AVX2__ 1 @@ -2601,7 +2601,8 @@ // CHECK_ARL_M32: #define __GFNI__ 1 // CHECK_ARL_M32: #define __HRESET__ 1 // CHECK_ARL_M32: #define __INVPCID__ 1 -// CHECK_ARL_M32: #define __KL__ 1 +// CHECK_KL_M32: #define __KL__ 1 +// CHECK_NKL_M32-NOT: __KL__ // CHECK_ARL_M32: #define __LZCNT__ 1 // CHECK_ARL_M32: #define __MMX__ 1 // CHECK_ARL_M32: #define __MOVBE__ 1 @@ -2645,7 +2646,8 @@ // CHECK_ARL_M32: #define __VAES__ 1 // CHECK_ARL_M32: #define __VPCLMULQDQ__ 1 // CHECK_ARL_M32: #define __WAITPKG__ 1 -// CHECK_ARL_M32: #define __WIDEKL__ 1 +// CHECK_KL_M32: #define __WIDEKL__ 1 +// CHECK_NKL_M32-NOT: __WIDEKL__ // CHECK_ARL_M32: #define __XSAVEC__ 1 // CHECK_ARL_M32: #define __XSAVEOPT__ 1 // CHECK_ARL_M32: #define __XSAVES__ 1 @@ -2659,25 +2661,25 @@ // RUN: %clang -march=sierraforest -m64 -E -dM %s -o - 2>&1 \ // RUN: --target=i386 \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_KL_M64 // RUN: %clang -march=grandridge -m64 -E -dM %s -o - 2>&1 \ // RUN: --target=i386 \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_KL_M64 // RUN: %clang -march=arrowlake -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ARL_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_KL_M64 // RUN: %clang -march=arrowlake-s -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_KL_M64 // RUN: %clang -march=lunarlake -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_KL_M64 // RUN: %clang -march=pantherlake -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_PTL_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_PTL_M64,CHECK_NKL_M64 // RUN: %clang -march=clearwaterforest -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_PTL_M64,CHECK_CWF_M64 +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_PTL_M64,CHECK_CWF_M64,CHECK_NKL_M64 // CHECK_ARL_M64: #define __ADX__ 1 // CHECK_ARL_M64: #define __AES__ 1 // CHECK_ARL_M64: #define __AVX2__ 1 @@ -2704,7 +2706,8 @@ // CHECK_ARL_M64: #define __GFNI__ 1 // CHECK_ARL_M64: #define __HRESET__ 1 // CHECK_ARL_M64: #define __INVPCID__ 1 -// CHECK_ARL_M64: #define __KL__ 1 +// CHECK_KL_M64: #define __KL__ 1 +// CHECK_NKL_M64-NOT: __KL__ // CHECK_ARL_M64: #define __LZCNT__ 1 // CHECK_ARL_M64: #define __MMX__ 1 // CHECK_ARL_M64: #define __MOVBE__ 1 @@ -2749,7 +2752,8 @@ // CHECK_ARL_M64: #define __VAES__ 1 // CHECK_ARL_M64: #define __VPCLMULQDQ__ 1 // CHECK_ARL_M64: #define __WAITPKG__ 1 -// CHECK_ARL_M64: #define __WIDEKL__ 1 +// CHECK_KL_M64: #define __WIDEKL__ 1 +// CHECK_NKL_M64-NOT: __WIDEKL__ // CHECK_ARL_M64: #define __XSAVEC__ 1 // CHECK_ARL_M64: #define __XSAVEOPT__ 1 // CHECK_ARL_M64: #define __XSAVES__ 1 diff --git a/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bf16.cpp b/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bf16.cpp new file mode 100644 index 0000000000000..e7d1aa045edf9 --- /dev/null +++ b/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bf16.cpp @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s +// REQUIRES: aarch64-registered-target + +#include + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +__attribute__((target("bf16"))) +void test_bf16(svbool_t pg, svfloat32_t svf32, svbfloat16_t svbf16, bfloat16_t bf16) MODE_ATTR +{ + svbfdot_f32(svf32, svbf16, svbf16); + svbfdot_n_f32(svf32, svbf16, bf16); + svbfdot_lane_f32(svf32, svbf16, svbf16, 0); + + svbfmlalb_f32(svf32, svbf16, svbf16); + svbfmlalb_n_f32(svf32, svbf16, bf16); + svbfmlalb_lane_f32(svf32, svbf16, svbf16, 0); + + svbfmlalt_f32(svf32, svbf16, svbf16); + svbfmlalt_n_f32(svf32, svbf16, bf16); + svbfmlalt_lane_f32(svf32, svbf16, svbf16, 0); + + svcvt_bf16_f32_m(svbf16, pg, svf32); + svcvt_bf16_f32_x(pg, svf32); + svcvt_bf16_f32_z(pg, svf32); + + svcvtnt_bf16_f32_m(svbf16, pg, svf32); + svcvtnt_bf16_f32_x(svbf16, pg, svf32); +} + +void test_no_bf16(svbool_t pg, svfloat32_t svf32, svbfloat16_t svbf16, bfloat16_t bf16) MODE_ATTR +{ + // expected-error@+1 {{'svbfdot_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfdot_f32(svf32, svbf16, svbf16); + // expected-error@+1 {{'svbfdot_n_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfdot_n_f32(svf32, svbf16, bf16); + // expected-error@+1 {{'svbfdot_lane_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfdot_lane_f32(svf32, svbf16, svbf16, 0); + + // expected-error@+1 {{'svbfmlalb_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfmlalb_f32(svf32, svbf16, svbf16); + // expected-error@+1 {{'svbfmlalb_n_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfmlalb_n_f32(svf32, svbf16, bf16); + // expected-error@+1 {{'svbfmlalb_lane_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfmlalb_lane_f32(svf32, svbf16, svbf16, 0); + + // expected-error@+1 {{'svbfmlalt_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfmlalt_f32(svf32, svbf16, svbf16); + // expected-error@+1 {{'svbfmlalt_n_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfmlalt_n_f32(svf32, svbf16, bf16); + // expected-error@+1 {{'svbfmlalt_lane_f32' needs target feature (sve,bf16)|(sme,bf16)}} + svbfmlalt_lane_f32(svf32, svbf16, svbf16, 0); + + // expected-error@+1 {{'svcvt_bf16_f32_m' needs target feature (sve,bf16)|(sme,bf16)}} + svcvt_bf16_f32_m(svbf16, pg, svf32); + // expected-error@+1 {{'svcvt_bf16_f32_x' needs target feature (sve,bf16)|(sme,bf16)}} + svcvt_bf16_f32_x(pg, svf32); + // expected-error@+1 {{'svcvt_bf16_f32_z' needs target feature (sve,bf16)|(sme,bf16)}} + svcvt_bf16_f32_z(pg, svf32); + + // expected-error@+1 {{'svcvtnt_bf16_f32_m' needs target feature (sve,bf16)|(sme,bf16)}} + svcvtnt_bf16_f32_m(svbf16, pg, svf32); + // NOTE: svcvtnt_bf16_f32_x is a macro that expands to svcvtnt_bf16_f32_m. + // expected-error@+1 {{'svcvtnt_bf16_f32_m' needs target feature (sve,bf16)|(sme,bf16)}} + svcvtnt_bf16_f32_x(svbf16, pg, svf32); +} diff --git a/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bf16_non_streaming_only.cpp b/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bf16_non_streaming_only.cpp new file mode 100644 index 0000000000000..1960130fa2145 --- /dev/null +++ b/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bf16_non_streaming_only.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s +// REQUIRES: aarch64-registered-target + +#include + +__attribute__((target("bf16"))) +void test_bf16(svfloat32_t svf32, svbfloat16_t svbf16) +{ + svbfmmla_f32(svf32, svbf16, svbf16); +} + +void test_no_bf16(svfloat32_t svf32, svbfloat16_t svbf16) +{ + // expected-error@+1 {{'svbfmmla_f32' needs target feature sve,bf16}} + svbfmmla_f32(svf32, svbf16, svbf16); +} + +__attribute__((target("sme,bf16"))) +void test_bf16_streaming(svfloat32_t svf32, svbfloat16_t svbf16) __arm_streaming +{ + // expected-error@+1 {{builtin can only be called from a non-streaming function}} + svbfmmla_f32(svf32, svbf16, svbf16); +} diff --git a/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bfloat.cpp b/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bfloat.cpp deleted file mode 100644 index fcdd0516ed5a9..0000000000000 --- a/clang/test/Sema/aarch64-sve-intrinsics/acle_sve_bfloat.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// REQUIRES: aarch64-registered-target - -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s - -#include - -void test_bfloat(svbool_t pg, uint64_t u64, int64_t i64, const bfloat16_t *const_bf16_ptr, bfloat16_t *bf16_ptr, svbfloat16_t bf16, svbfloat16x2_t bf16x2, svbfloat16x3_t bf16x3, svbfloat16x4_t bf16x4) -{ - // expected-error@+1 {{'svcreate2_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svcreate2_bf16(bf16, bf16); - // expected-error@+1 {{'svcreate3_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svcreate3_bf16(bf16, bf16, bf16); - // expected-error@+1 {{'svcreate4_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svcreate4_bf16(bf16, bf16, bf16, bf16); - // expected-error@+1 {{'svget2_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svget2_bf16(bf16x2, 1); - // expected-error@+1 {{'svget3_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svget3_bf16(bf16x3, 1); - // expected-error@+1 {{'svget4_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svget4_bf16(bf16x4, 1); - // expected-error@+1 {{'svld1_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svld1_bf16(pg, const_bf16_ptr); - // expected-error@+1 {{'svld1_vnum_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svld1_vnum_bf16(pg, const_bf16_ptr, i64); - // expected-error@+1 {{'svld1rq_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svld1rq_bf16(pg, const_bf16_ptr); - // expected-error@+1 {{'svldff1_bf16' needs target feature sve,bf16}} - svldff1_bf16(pg, const_bf16_ptr); - // expected-error@+1 {{'svldff1_vnum_bf16' needs target feature sve,bf16}} - svldff1_vnum_bf16(pg, const_bf16_ptr, i64); - // expected-error@+1 {{'svldnf1_bf16' needs target feature sve,bf16}} - svldnf1_bf16(pg, const_bf16_ptr); - // expected-error@+1 {{'svldnf1_vnum_bf16' needs target feature sve,bf16}} - svldnf1_vnum_bf16(pg, const_bf16_ptr, i64); - // expected-error@+1 {{'svldnt1_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svldnt1_bf16(pg, const_bf16_ptr); - // expected-error@+1 {{'svldnt1_vnum_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svldnt1_vnum_bf16(pg, const_bf16_ptr, i64); - // expected-error@+1 {{'svrev_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svrev_bf16(bf16); - // expected-error@+1 {{'svset2_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svset2_bf16(bf16x2, 1, bf16); - // expected-error@+1 {{'svset3_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svset3_bf16(bf16x3, 1, bf16); - // expected-error@+1 {{'svset4_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svset4_bf16(bf16x4, 1, bf16); - // expected-error@+1 {{'svst1_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svst1_bf16(pg, bf16_ptr, bf16); - // expected-error@+1 {{'svst1_vnum_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svst1_vnum_bf16(pg, bf16_ptr, i64, bf16); - // expected-error@+1 {{'svstnt1_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svstnt1_bf16(pg, bf16_ptr, bf16); - // expected-error@+1 {{'svstnt1_vnum_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svstnt1_vnum_bf16(pg, bf16_ptr, i64, bf16); - // expected-error@+1 {{'svtrn1_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svtrn1_bf16(bf16, bf16); - // expected-error@+1 {{'svtrn1q_bf16' needs target feature sve,bf16}} - svtrn1q_bf16(bf16, bf16); - // expected-error@+1 {{'svtrn2_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svtrn2_bf16(bf16, bf16); - // expected-error@+1 {{'svtrn2q_bf16' needs target feature sve,bf16}} - svtrn2q_bf16(bf16, bf16); - // expected-error@+1 {{'svundef_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svundef_bf16(); - // expected-error@+1 {{'svundef2_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svundef2_bf16(); - // expected-error@+1 {{'svundef3_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svundef3_bf16(); - // expected-error@+1 {{'svundef4_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svundef4_bf16(); - // expected-error@+1 {{'svuzp1_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svuzp1_bf16(bf16, bf16); - // expected-error@+1 {{'svuzp1q_bf16' needs target feature sve,bf16}} - svuzp1q_bf16(bf16, bf16); - // expected-error@+1 {{'svuzp2_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svuzp2_bf16(bf16, bf16); - // expected-error@+1 {{'svuzp2q_bf16' needs target feature sve,bf16}} - svuzp2q_bf16(bf16, bf16); - // expected-error@+1 {{'svzip1_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svzip1_bf16(bf16, bf16); - // expected-error@+1 {{'svzip1q_bf16' needs target feature sve,bf16}} - svzip1q_bf16(bf16, bf16); - // expected-error@+1 {{'svzip2_bf16' needs target feature (sve,bf16)|(sme,bf16)}} - svzip2_bf16(bf16, bf16); - // expected-error@+1 {{'svzip2q_bf16' needs target feature sve,bf16}} - svzip2q_bf16(bf16, bf16); -} diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp deleted file mode 100644 index 4a2f8238caf0e..0000000000000 --- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// REQUIRES: aarch64-registered-target - -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -verify=overload -verify-ignore-unexpected=error,note -emit-llvm -o - %s - -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 -#endif - -#include - -void test_bfloat(const bfloat16_t *const_bf16_ptr, svbfloat16_t bf16, svbfloat16x2_t bf16x2) -{ - // expected-error@+2 {{'svwhilerw_bf16' needs target feature (sve,sve2,bf16)|(sme,bf16)}} - // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2,bf16)|(sme,bf16)}} - SVE_ACLE_FUNC(svwhilerw,_bf16,,)(const_bf16_ptr, const_bf16_ptr); - // expected-error@+2 {{'svtbx_bf16' needs target feature (sve,sve2,bf16)|(sme,bf16)}} - // overload-error@+1 {{'svtbx' needs target feature (sve,sve2,bf16)|(sme,bf16)}} - SVE_ACLE_FUNC(svtbx,_bf16,,)(bf16, bf16, svundef_u16()); - // expected-error@+2 {{'svtbl2_bf16' needs target feature (sve,sve2,bf16)|(sme,bf16)}} - // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2,bf16)|(sme,bf16)}} - SVE_ACLE_FUNC(svtbl2,_bf16,,)(bf16x2, svundef_u16()); - // expected-error@+2 {{'svwhilewr_bf16' needs target feature (sve,sve2,bf16)|(sme,bf16)}} - // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2,bf16)|(sme,bf16)}} - SVE_ACLE_FUNC(svwhilewr,_bf16,,)(const_bf16_ptr, const_bf16_ptr); -} diff --git a/clang/test/Sema/constant_builtins_vector.cpp b/clang/test/Sema/constant-builtins-vector.cpp similarity index 99% rename from clang/test/Sema/constant_builtins_vector.cpp rename to clang/test/Sema/constant-builtins-vector.cpp index f26dfb25d49b9..bde5c478b2b6f 100644 --- a/clang/test/Sema/constant_builtins_vector.cpp +++ b/clang/test/Sema/constant-builtins-vector.cpp @@ -27,6 +27,7 @@ typedef unsigned long long vector4ulong __attribute__((__vector_size__(32))); typedef unsigned int vector4uint __attribute__((__vector_size__(16))); typedef short vector4short __attribute__((__vector_size__(8))); typedef char vector4char __attribute__((__vector_size__(4))); +typedef unsigned char vector4uchar __attribute__((__vector_size__(4))); typedef BitInt8 vector4BitInt8 __attribute__((__vector_size__(4))); typedef BitInt32 vector4BitInt32 __attribute__((__vector_size__(16))); typedef BitInt128 vector4BitInt128 __attribute__((__vector_size__(64))); @@ -848,6 +849,7 @@ static_assert(__builtin_elementwise_add_sat(~(1 << 31), 42) == ~(1 << 31)); static_assert(__builtin_elementwise_add_sat((1 << 31), -42) == (1 << 31)); static_assert(__builtin_elementwise_add_sat(~0U, 1U) == ~0U); static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_add_sat((vector4char){1, 2, 3, 4}, (vector4char){1, 2, 3, 4})) == (LITTLE_END ? 0x08060402 : 0x02040608)); +static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_add_sat((vector4uchar){1, 2, 3, 4}, (vector4uchar){0, 1, 2, 3})) == (LITTLE_END ? 0x07050301U : 0x01030507U)); static_assert(__builtin_bit_cast(unsigned long long, __builtin_elementwise_add_sat((vector4short){(short)0x8000, (short)0x8001, (short)0x8002, (short)0x8003}, (vector4short){-7, -8, -9, -10}) == (LITTLE_END ? 0x8000800080008000 : 0x8000800080008000))); static_assert(__builtin_elementwise_sub_sat(1, 2) == -1); @@ -856,4 +858,5 @@ static_assert(__builtin_elementwise_sub_sat(~(1 << 31), -42) == ~(1 << 31)); static_assert(__builtin_elementwise_sub_sat((1 << 31), 42) == (1 << 31)); static_assert(__builtin_elementwise_sub_sat(0U, 1U) == 0U); static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_sub_sat((vector4char){5, 4, 3, 2}, (vector4char){1, 1, 1, 1})) == (LITTLE_END ? 0x01020304 : 0x04030201)); +static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_sub_sat((vector4uchar){5, 4, 3, 2}, (vector4uchar){1, 1, 1, 1})) == (LITTLE_END ? 0x01020304U : 0x04030201U)); static_assert(__builtin_bit_cast(unsigned long long, __builtin_elementwise_sub_sat((vector4short){(short)0x8000, (short)0x8001, (short)0x8002, (short)0x8003}, (vector4short){7, 8, 9, 10}) == (LITTLE_END ? 0x8000800080008000 : 0x8000800080008000))); diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp new file mode 100644 index 0000000000000..64ecba8faac9e --- /dev/null +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -0,0 +1,192 @@ +// RUN: %clang_cc1 -mllvm -debug-only=LifetimeFacts -Wexperimental-lifetime-safety %s 2>&1 | FileCheck %s +// REQUIRES: asserts + +struct MyObj { + int id; + ~MyObj() {} // Non-trivial destructor +}; + +// Simple Local Variable Address and Return +// CHECK-LABEL: Function: return_local_addr +MyObj* return_local_addr() { + MyObj x {10}; + MyObj* p = &x; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_X:[0-9]+]], OriginID: [[O_ADDR_X:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_X]]) + return p; +// CHECK: AssignOrigin (DestID: [[O_RET_VAL:[0-9]+]], SrcID: [[O_P]]) +// CHECK: ReturnOfOrigin (OriginID: [[O_RET_VAL]]) +// CHECK: Expire (LoanID: [[L_X]]) +} + + +// Pointer Assignment and Return +// CHECK-LABEL: Function: assign_and_return_local_addr +// CHECK-NEXT: Block B{{[0-9]+}}: +MyObj* assign_and_return_local_addr() { + MyObj y{20}; + MyObj* ptr1 = &y; +// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], OriginID: [[O_ADDR_Y:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_PTR1:[0-9]+]], SrcID: [[O_ADDR_Y]]) + MyObj* ptr2 = ptr1; +// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL:[0-9]+]], SrcID: [[O_PTR1]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2:[0-9]+]], SrcID: [[O_PTR1_RVAL]]) + ptr2 = ptr1; +// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL_2:[0-9]+]], SrcID: [[O_PTR1]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR1_RVAL_2]]) + ptr2 = ptr2; // Self assignment. +// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL:[0-9]+]], SrcID: [[O_PTR2]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR2_RVAL]]) + return ptr2; +// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL_2:[0-9]+]], SrcID: [[O_PTR2]]) +// CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]]) +// CHECK: Expire (LoanID: [[L_Y]]) +} + + +// Return of Non-Pointer Type +// CHECK-LABEL: Function: return_int_val +// CHECK-NEXT: Block B{{[0-9]+}}: +int return_int_val() { + int x = 10; + return x; +} +// CHECK-NEXT: End of Block + + +// Loan Expiration (Automatic Variable, C++) +// CHECK-LABEL: Function: loan_expires_cpp +// CHECK-NEXT: Block B{{[0-9]+}}: +void loan_expires_cpp() { + MyObj obj{1}; + MyObj* pObj = &obj; +// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], OriginID: [[O_ADDR_OBJ:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]]) +// CHECK: Expire (LoanID: [[L_OBJ]]) +} + + +// FIXME: No expire for Trivial Destructors +// CHECK-LABEL: Function: loan_expires_trivial +// CHECK-NEXT: Block B{{[0-9]+}}: +void loan_expires_trivial() { + int trivial_obj = 1; + int* pTrivialObj = &trivial_obj; +// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], OriginID: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_PTOBJ:[0-9]+]], SrcID: [[O_ADDR_TRIVIAL_OBJ]]) +// CHECK-NOT: Expire (LoanID: [[L_TRIVIAL_OBJ]]) +// CHECK-NEXT: End of Block + // FIXME: Add check for Expire once trivial destructors are handled for expiration. +} + + +// CHECK-LABEL: Function: conditional +void conditional(bool condition) { + int a = 5; + int b = 10; + int* p = nullptr; + + if (condition) + p = &a; + // CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]]) + else + p = &b; + // CHECK: Issue (LoanID: [[L_B:[0-9]+]], OriginID: [[O_ADDR_B:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_B]]) + int *q = p; + // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]]) + // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]]) +} + + +// CHECK-LABEL: Function: overwrite_origin +void overwrite_origin() { + MyObj s1; + MyObj s2; + MyObj* p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + p = &s2; +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) +// CHECK: Expire (LoanID: [[L_S2]]) +// CHECK: Expire (LoanID: [[L_S1]]) +} + + +// CHECK-LABEL: Function: reassign_to_null +void reassign_to_null() { + MyObj s1; + MyObj* p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + p = nullptr; +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: Expire (LoanID: [[L_S1]]) +} +// FIXME: Have a better representation for nullptr than just an empty origin. +// It should be a separate loan and origin kind. + + +// CHECK-LABEL: Function: reassign_in_if +void reassign_in_if(bool condition) { + MyObj s1; + MyObj s2; + MyObj* p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + if (condition) { + p = &s2; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) + } +// CHECK: Block B{{[0-9]+}}: +// CHECK: Expire (LoanID: [[L_S2]]) +// CHECK: Expire (LoanID: [[L_S1]]) +} + + +// CHECK-LABEL: Function: nested_scopes +void nested_scopes() { + MyObj* p = nullptr; +// CHECK: Block B{{[0-9]+}}: +// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) + { + MyObj outer; + p = &outer; +// CHECK: Issue (LoanID: [[L_OUTER:[0-9]+]], OriginID: [[O_ADDR_OUTER:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_OUTER]]) + { + MyObj inner; + p = &inner; +// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]]) + } +// CHECK: Expire (LoanID: [[L_INNER]]) + } +// CHECK: Expire (LoanID: [[L_OUTER]]) +} + + +// CHECK-LABEL: Function: pointer_indirection +void pointer_indirection() { + int a; + int *p = &a; +// CHECK: Block B1: +// CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]]) + int **pp = &p; +// CHECK: Issue (LoanID: [[L_P:[0-9]+]], OriginID: [[O_ADDR_P:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_PP:[0-9]+]], SrcID: [[O_ADDR_P]]) + +// FIXME: The Origin for the RHS is broken + int *q = *pp; +// CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: {{[0-9]+}}) +} diff --git a/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp b/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp index b02930e145700..6bf2f441e83c4 100644 --- a/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp +++ b/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp @@ -60,3 +60,17 @@ concept atomicish = requires() { }; atomicish f(); // expected-error {{expected 'auto' or 'decltype(auto)' after concept name}} } // namespace GH138820 + +namespace GH138823 { + template void foo(); + template + concept ConceptA = requires { foo(); }; + // expected-error@-1 {{expression contains unexpanded parameter pack 'Ts'}} + + template + concept ConceptB = ConceptA; + + template void bar(Foo); + + void test() { bar(1); } +} diff --git a/clang/test/SemaCXX/cxx1z-constexpr-lambdas.cpp b/clang/test/SemaCXX/cxx1z-constexpr-lambdas.cpp index 33a6039459484..aa8d055e44971 100644 --- a/clang/test/SemaCXX/cxx1z-constexpr-lambdas.cpp +++ b/clang/test/SemaCXX/cxx1z-constexpr-lambdas.cpp @@ -3,6 +3,10 @@ // RUN: %clang_cc1 -std=c++1z -verify -fsyntax-only -fblocks -fdelayed-template-parsing %s -fcxx-exceptions // RUN: %clang_cc1 -std=c++14 -verify -fsyntax-only -fblocks %s -DCPP14_AND_EARLIER -fcxx-exceptions +// RUN: %clang_cc1 -std=c++1z -verify -fsyntax-only -fblocks %s -fcxx-exceptions -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++20 -verify -fsyntax-only -fblocks %s -fcxx-exceptions -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++1z -verify -fsyntax-only -fblocks -fdelayed-template-parsing %s -fcxx-exceptions -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++14 -verify -fsyntax-only -fblocks %s -DCPP14_AND_EARLIER -fcxx-exceptions -fexperimental-new-constant-interpreter namespace test_lambda_is_literal { #ifdef CPP14_AND_EARLIER diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index 3a3dc8855d827..6987d0c020457 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -1290,3 +1290,60 @@ void f() { } + +namespace GH147121 { +struct X {}; +struct S1 { + bool operator==(this auto &&, const X &); // #S1-cand +}; +struct S2 { + bool operator==(this X, const auto &&); // #S2-cand +}; + +struct S3 { + S3& operator++(this X); // #S3-inc-cand + S3& operator++(this int); // #S3-inc-cand + int operator[](this X); // #S3-sub-cand + int operator[](this int); // #S3-sub-cand2 + void f(this X); // #S3-f-cand + void f(this int); // #S3-f-cand2 +}; + +int main() { + S1{} == S1{}; + // expected-error@-1 {{invalid operands to binary expression ('S1' and 'S1')}} + // expected-note@#S1-cand {{candidate function template not viable}} + // expected-note@#S1-cand {{candidate function (with reversed parameter order) template not viable}} + + + S1{} != S1{}; + // expected-error@-1 {{invalid operands to binary expression ('S1' and 'S1')}} + // expected-note@#S1-cand {{candidate function template not viable}} + // expected-note@#S1-cand {{candidate function (with reversed parameter order) template not viable}} + + + S2{} == S2{}; + // expected-error@-1 {{invalid operands to binary expression ('S2' and 'S2')}} + // expected-note@#S2-cand {{candidate function template not viable}} + // expected-note@#S2-cand {{candidate function (with reversed parameter order) template not viable}} + + + S2{} != S2{}; + // expected-error@-1 {{invalid operands to binary expression ('S2' and 'S2')}} + // expected-note@#S2-cand {{candidate function template not viable}} + // expected-note@#S2-cand {{candidate function (with reversed parameter order) template not viable}} + + S3 s3; + ++s3; + // expected-error@-1{{cannot increment value of type 'S3'}} + s3[]; + // expected-error@-1{{no viable overloaded operator[] for type 'S3'}} + // expected-note@#S3-sub-cand {{candidate function not viable: no known conversion from 'S3' to 'X' for object argument}} + // expected-note@#S3-sub-cand2 {{candidate function not viable: no known conversion from 'S3' to 'int' for object argument}} + + s3.f(); + // expected-error@-1{{no matching member function for call to 'f'}} + // expected-note@#S3-f-cand {{candidate function not viable: no known conversion from 'S3' to 'X' for object argument}} + // expected-note@#S3-f-cand2 {{candidate function not viable: no known conversion from 'S3' to 'int' for object argument}} +} +} diff --git a/clang/test/SemaCXX/enum-scoped.cpp b/clang/test/SemaCXX/enum-scoped.cpp index d7b7923430aff..0ce47274979d9 100644 --- a/clang/test/SemaCXX/enum-scoped.cpp +++ b/clang/test/SemaCXX/enum-scoped.cpp @@ -349,3 +349,18 @@ enum class B; A a; B b{a}; // expected-error {{cannot initialize}} } + +namespace GH147736 { +template +struct S { + enum OhBoy : Ty { // expected-error 2 {{'_Atomic' qualifier ignored; operations involving the enumeration type will be non-atomic}} + Unimportant + } e; +}; + +// Okay, was previously rejected. The underlying type is int. +S<_Atomic(int)> s; // expected-warning {{'_Atomic' is a C11 extension}} + // expected-note@-1 {{in instantiation of template class 'GH147736::S<_Atomic(int)>' requested here}} +static_assert(__is_same(__underlying_type(S<_Atomic(long long)>::OhBoy), long long), ""); // expected-warning {{'_Atomic' is a C11 extension}} + // expected-note@-1 {{in instantiation of template class 'GH147736::S<_Atomic(long long)>' requested here}} +} diff --git a/clang/test/SemaCXX/uninitialized-multiple-uses.cpp b/clang/test/SemaCXX/uninitialized-multiple-uses.cpp new file mode 100644 index 0000000000000..a6a4ad39d0be0 --- /dev/null +++ b/clang/test/SemaCXX/uninitialized-multiple-uses.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -fsyntax-only -Wuninitialized -verify %s + +void use_val(int); +void use_const_ref(const int &); + +// Test that the warning about self initialization is generated only once. +void test_self_init_1warning(bool a) { + int v = v; // expected-warning {{variable 'v' is uninitialized when used within its own initialization}} + if (a) + use_val(v); + else + use_const_ref(v); +} + +// Test that the diagnostic for using an uninitialized variable directly has a +// higher priority than using the same variable via a const reference. +void test_prioritize_use_over_const_ref(bool a) { + int v; // expected-note {{initialize the variable 'v' to silence this warning}} + if (a) // expected-warning {{variable 'v' is used uninitialized whenever 'if' condition is false}} + // expected-note@-1 {{remove the 'if' if its condition is always true}} + v = 2; + else + use_const_ref(v); + use_val(v); // expected-note {{uninitialized use occurs here}} +} diff --git a/clang/test/SemaCXX/warn-uninitialized-const-reference.cpp b/clang/test/SemaCXX/warn-uninitialized-const-reference.cpp index d24b561441d8f..7204d6525cef9 100644 --- a/clang/test/SemaCXX/warn-uninitialized-const-reference.cpp +++ b/clang/test/SemaCXX/warn-uninitialized-const-reference.cpp @@ -27,7 +27,7 @@ int const_use(const int i); void f(int a) { int i; const_ref_use(i); // expected-warning {{variable 'i' is uninitialized when passed as a const reference argument here}} - int j = j + const_ref_use(j); // expected-warning {{variable 'j' is uninitialized when used within its own initialization}} expected-warning {{variable 'j' is uninitialized when passed as a const reference argument here}} + int j = j + const_ref_use(j); // expected-warning {{variable 'j' is uninitialized when used within its own initialization}} A a1 = const_ref_use_A(a1); // expected-warning {{variable 'a1' is uninitialized when passed as a const reference argument here}} int k = const_use(k); // expected-warning {{variable 'k' is uninitialized when used within its own initialization}} A a2 = const_use_A(a2); // expected-warning {{variable 'a2' is uninitialized when used within its own initialization}} diff --git a/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl b/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl index 477a16a454a9c..d7c6876d3b9e3 100644 --- a/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl @@ -111,4 +111,8 @@ Buffer BufferErr3; void main() { (void)Buff.__handle; // expected-error {{'__handle' is a private member of 'hlsl::Buffer>'}} // expected-note@* {{implicitly declared private here}} + + // expected-error@+2 {{cannot assign to return value because function 'operator[]' returns a const value}} + // expected-note@* {{function 'operator[]' which returns const-qualified type 'vector' declared here}} + Buff[0] = 0.0; } diff --git a/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl b/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl index bf541f4a07da7..fbd9288590adc 100644 --- a/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/StructuredBuffers.hlsl @@ -28,4 +28,8 @@ StructuredBuffer BufferErr4; void main() { (void)Buff.__handle; // expected-error {{'__handle' is a private member of 'hlsl::StructuredBuffer>'}} // expected-note@* {{implicitly declared private here}} + + // expected-error@+2 {{cannot assign to return value because function 'operator[]' returns a const value}} + // expected-note@* {{function 'operator[]' which returns const-qualified type 'vector' declared here}} + Buff[0] = 0.0; } diff --git a/clang/test/SemaHLSL/RootSignature-err.hlsl b/clang/test/SemaHLSL/RootSignature-err.hlsl index 118fc38daf3f2..04013974d28b9 100644 --- a/clang/test/SemaHLSL/RootSignature-err.hlsl +++ b/clang/test/SemaHLSL/RootSignature-err.hlsl @@ -34,3 +34,7 @@ void bad_root_signature_5() {} // expected-error@+1 {{expected ')' to denote end of parameters, or, another valid parameter of RootConstants}} [RootSignature(MultiLineRootSignature)] void bad_root_signature_6() {} + +// expected-error@+1 {{expected end of stream to denote end of parameters, or, another valid parameter of RootSignature}} +[RootSignature("RootFlags() RootConstants(b0, num32BitConstants = 1)")] +void bad_root_signature_7() {} diff --git a/clang/test/SemaHLSL/RootSignature.hlsl b/clang/test/SemaHLSL/RootSignature.hlsl new file mode 100644 index 0000000000000..810f81479caab --- /dev/null +++ b/clang/test/SemaHLSL/RootSignature.hlsl @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -fsyntax-only %s -verify + +// expected-no-diagnostics + +// Test that we have consistent behaviour for comma parsing. Namely: +// - a single trailing comma is allowed after any parameter +// - a trailing comma is not required + +[RootSignature("CBV(b0, flags = DATA_VOLATILE,), DescriptorTable(Sampler(s0,),),")] +void maximum_commas() {} + +[RootSignature("CBV(b0, flags = DATA_VOLATILE), DescriptorTable(Sampler(s0))")] +void minimal_commas() {} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl index 7494c4f984353..9711b3bdded6b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl @@ -12,6 +12,14 @@ void test_s_monitor_sleep(short a) { __builtin_amdgcn_s_monitor_sleep(a); // expected-error {{'__builtin_amdgcn_s_monitor_sleep' must be a constant integer}} } +void test_s_wait_asynccnt(short a) { + __builtin_amdgcn_s_wait_asynccnt(a); // expected-error {{'__builtin_amdgcn_s_wait_asynccnt' must be a constant integer}} +} + +void test_s_wait_tensorcnt(short a) { + __builtin_amdgcn_s_wait_tensorcnt(a); // expected-error {{'__builtin_amdgcn_s_wait_tensorcnt' must be a constant integer}} +} + void test__builtin_amdgcn_cvt_f16_fp8(int a, int b) { __builtin_amdgcn_cvt_f16_fp8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_fp8' must be a constant integer}} } diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 0f1fa8b329fd6..9d34b62da20f5 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1302,6 +1302,9 @@ getDeviceInput(const ArgList &Args) { // after every regular input file so that libraries may be included out of // order. This follows 'ld.lld' semantics which are more lenient. bool Extracted = true; + llvm::DenseSet ShouldExtract; + for (auto &Arg : Args.getAllArgValues(OPT_should_extract)) + ShouldExtract.insert(Arg); while (Extracted) { Extracted = false; for (OffloadFile &Binary : ArchiveFilesToExtract) { @@ -1315,8 +1318,9 @@ getDeviceInput(const ArgList &Args) { CompatibleTargets.emplace_back(ID); for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { - // Only extract an if we have an an object matching this target. - if (!InputFiles.count(ID)) + // Only extract an if we have an an object matching this target or it + // was specifically requested. + if (!InputFiles.count(ID) && !ShouldExtract.contains(ID.second)) continue; Expected ExtractOrErr = @@ -1330,7 +1334,7 @@ getDeviceInput(const ArgList &Args) { // Skip including the file if it is an archive that does not resolve // any symbols. - if (!Extracted) + if (!Extracted && !ShouldExtract.contains(ID.second)) continue; // If another target needs this binary it must be copied instead. diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 17fb9db35fe39..fa73e02fd5178 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -59,6 +59,10 @@ def override_image : Joined<["--"], "override-image=">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Uses the provided file as if it were the output of the device link step">; +def should_extract : CommaJoined<["--"], "should-extract=">, + Flags<[WrapperOnlyOption]>, MetaVarName<"">, + HelpText<"Set of device architectures we should always extract if found.">; + // Flags passed to the device linker. def arch_EQ : Joined<["--"], "arch=">, Flags<[DeviceOnlyOption, HelpHidden]>, MetaVarName<"">, diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 8b590bd57e1a3..f10b73278381b 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -94,6 +94,7 @@ static bool DeprecatedDriverCommand; static ResourceDirRecipeKind ResourceDirRecipe; static bool Verbose; static bool PrintTiming; +static bool EmitVisibleModules; static llvm::BumpPtrAllocator Alloc; static llvm::StringSaver Saver{Alloc}; static std::vector CommandLine; @@ -232,6 +233,8 @@ static void ParseArgs(int argc, char **argv) { PrintTiming = Args.hasArg(OPT_print_timing); + EmitVisibleModules = Args.hasArg(OPT_emit_visible_modules); + Verbose = Args.hasArg(OPT_verbose); RoundTripArgs = Args.hasArg(OPT_round_trip_args); @@ -380,6 +383,14 @@ static auto toJSONSorted(llvm::json::OStream &JOS, }; } +static auto toJSONSorted(llvm::json::OStream &JOS, std::vector V) { + llvm::sort(V); + return [&JOS, V = std::move(V)] { + for (const StringRef Entry : V) + JOS.value(Entry); + }; +} + // Thread safe. class FullDeps { public: @@ -396,6 +407,7 @@ class FullDeps { ID.NamedModule = std::move(TUDeps.ID.ModuleName); ID.NamedModuleDeps = std::move(TUDeps.NamedModuleDeps); ID.ClangModuleDeps = std::move(TUDeps.ClangModuleDeps); + ID.VisibleModules = std::move(TUDeps.VisibleModules); ID.DriverCommandLine = std::move(TUDeps.DriverCommandLine); ID.Commands = std::move(TUDeps.Commands); @@ -525,6 +537,9 @@ class FullDeps { JOS.attributeArray("file-deps", toJSONStrings(JOS, I.FileDeps)); JOS.attribute("input-file", StringRef(I.FileName)); + if (EmitVisibleModules) + JOS.attributeArray("visible-clang-modules", + toJSONSorted(JOS, I.VisibleModules)); }); } } else { @@ -545,6 +560,9 @@ class FullDeps { JOS.attributeArray("file-deps", toJSONStrings(JOS, I.FileDeps)); JOS.attribute("input-file", StringRef(I.FileName)); + if (EmitVisibleModules) + JOS.attributeArray("visible-clang-modules", + toJSONSorted(JOS, I.VisibleModules)); }); } }); @@ -596,6 +614,7 @@ class FullDeps { std::string NamedModule; std::vector NamedModuleDeps; std::vector ClangModuleDeps; + std::vector VisibleModules; std::vector DriverCommandLine; std::vector Commands; }; @@ -623,11 +642,12 @@ static bool handleTranslationUnitResult( return false; } -static bool handleModuleResult( - StringRef ModuleName, llvm::Expected &MaybeModuleGraph, - FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) { - if (!MaybeModuleGraph) { - llvm::handleAllErrors(MaybeModuleGraph.takeError(), +static bool handleModuleResult(StringRef ModuleName, + llvm::Expected &MaybeTUDeps, + FullDeps &FD, size_t InputIndex, + SharedStream &OS, SharedStream &Errs) { + if (!MaybeTUDeps) { + llvm::handleAllErrors(MaybeTUDeps.takeError(), [&ModuleName, &Errs](llvm::StringError &Err) { Errs.applyLocked([&](raw_ostream &OS) { OS << "Error while scanning dependencies for " @@ -637,7 +657,7 @@ static bool handleModuleResult( }); return true; } - FD.mergeDeps(std::move(*MaybeModuleGraph), InputIndex); + FD.mergeDeps(std::move(MaybeTUDeps->ModuleGraph), InputIndex); return false; } diff --git a/clang/tools/clang-scan-deps/Opts.td b/clang/tools/clang-scan-deps/Opts.td index 9cccbb3aaf0c8..03011f9ae1f75 100644 --- a/clang/tools/clang-scan-deps/Opts.td +++ b/clang/tools/clang-scan-deps/Opts.td @@ -37,6 +37,9 @@ defm resource_dir_recipe : Eq<"resource-dir-recipe", "How to produce missing '-r def print_timing : F<"print-timing", "Print timing information">; +def emit_visible_modules + : F<"emit-visible-modules", "emit visible modules in primary output">; + def verbose : F<"v", "Use verbose output">; def round_trip_args : F<"round-trip-args", "verify that command-line arguments are canonical by parsing and re-serializing">; diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp index a16fbffb76270..88707551b7698 100644 --- a/clang/unittests/Format/FormatTestComments.cpp +++ b/clang/unittests/Format/FormatTestComments.cpp @@ -747,16 +747,14 @@ TEST_F(FormatTestComments, DontSplitLineCommentsWithEscapedNewlines) { " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", getLLVMStyleWithColumns(50))); - // FIXME: One day we might want to implement adjustment of leading whitespace - // of the consecutive lines in this kind of comment: - EXPECT_EQ("double\n" - " a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" - " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" - " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", - format("double a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" - " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" - " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", - getLLVMStyleWithColumns(49))); + verifyFormat("double\n" + " a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + "double a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n" + " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + getLLVMStyleWithColumns(49)); } TEST_F(FormatTestComments, DontIntroduceMultilineComments) { diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index a1285e4bc9bf8..e281a4945a862 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -4126,6 +4126,13 @@ TEST_F(TokenAnnotatorTest, JsonCodeInRawString) { EXPECT_TOKEN(Tokens[6], tok::colon, TT_DictLiteral); } +TEST_F(TokenAnnotatorTest, LineCommentTrailingBackslash) { + auto Tokens = annotate("// a \\\n" + "// b"); + ASSERT_EQ(Tokens.size(), 3u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::comment, TT_LineComment); +} + } // namespace } // namespace format } // namespace clang diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index ff1697f1bbb9a..e82dcadebba3f 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -1238,4 +1238,166 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidNonZeroFlagsTest) { ASSERT_TRUE(Consumer->isSatisfied()); } +TEST_F(ParseHLSLRootSignatureTest, InvalidRootElementMissingCommaTest) { + // This test will check that an error is produced when there is a missing + // comma between parameters + const llvm::StringLiteral Source = R"cc( + RootFlags() + RootConstants(num32BitConstants = 1, b0) + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + SmallVector Elements; + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements, + Signature, *PP); + + // Test correct diagnostic produced + Consumer->setExpected(diag::err_hlsl_unexpected_end_of_params); + ASSERT_TRUE(Parser.parse()); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, InvalidDescriptorTableMissingCommaTest) { + // This test will check that an error is produced when there is a missing + // comma between parameters + const llvm::StringLiteral Source = R"cc( + DescriptorTable( + CBV(b0) + visibility = SHADER_VISIBILITY_ALL + ) + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + SmallVector Elements; + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements, + Signature, *PP); + + // Test correct diagnostic produced + Consumer->setExpected(diag::err_hlsl_unexpected_end_of_params); + ASSERT_TRUE(Parser.parse()); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, InvalidRootConstantParamsCommaTest) { + // This test will check that an error is produced when there is a missing + // comma between parameters + const llvm::StringLiteral Source = R"cc( + RootConstants( + num32BitConstants = 1 + b0 + ) + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + SmallVector Elements; + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements, + Signature, *PP); + + // Test correct diagnostic produced + Consumer->setExpected(diag::err_hlsl_unexpected_end_of_params); + ASSERT_TRUE(Parser.parse()); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, InvalidRootDescriptorParamsCommaTest) { + // This test will check that an error is produced when there is a missing + // comma between parameters + const llvm::StringLiteral Source = R"cc( + CBV( + b0 + flags = 0 + ) + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + SmallVector Elements; + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements, + Signature, *PP); + + // Test correct diagnostic produced + Consumer->setExpected(diag::err_hlsl_unexpected_end_of_params); + ASSERT_TRUE(Parser.parse()); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, InvalidDescriptorClauseParamsCommaTest) { + // This test will check that an error is produced when there is a missing + // comma between parameters + const llvm::StringLiteral Source = R"cc( + DescriptorTable( + UAV( + u0 + flags = 0 + ) + ) + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + SmallVector Elements; + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements, + Signature, *PP); + + // Test correct diagnostic produced + Consumer->setExpected(diag::err_hlsl_unexpected_end_of_params); + ASSERT_TRUE(Parser.parse()); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, InvalidStaticSamplerCommaTest) { + // This test will check that an error is produced when there is a missing + // comma between parameters + const llvm::StringLiteral Source = R"cc( + StaticSampler( + s0 + maxLOD = 3 + ) + )cc"; + + auto Ctx = createMinimalASTContext(); + StringLiteral *Signature = wrapSource(Ctx, Source); + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + + SmallVector Elements; + hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements, + Signature, *PP); + + // Test correct diagnostic produced + Consumer->setExpected(diag::err_hlsl_unexpected_end_of_params); + ASSERT_TRUE(Parser.parse()); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + } // anonymous namespace diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp index b461d9109271c..023c02ddaa3e4 100644 --- a/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp +++ b/clang/unittests/Tooling/DependencyScanning/DependencyScanningFilesystemTest.cpp @@ -233,3 +233,34 @@ TEST(DependencyScanningFilesystem, DiagnoseCachedFileSizeChange) { ASSERT_EQ(SizeInfo->CachedSize, 0u); ASSERT_EQ(SizeInfo->ActualSize, 8u); } + +TEST(DependencyScanningFilesystem, DoNotDiagnoseDirSizeChange) { + llvm::SmallString<128> Dir; + ASSERT_FALSE(llvm::sys::fs::createUniqueDirectory("tmp", Dir)); + + llvm::IntrusiveRefCntPtr FS = + llvm::vfs::createPhysicalFileSystem(); + + DependencyScanningFilesystemSharedCache SharedCache; + DependencyScanningWorkerFilesystem DepFS(SharedCache, FS); + + // Trigger the file system cache. + ASSERT_EQ(DepFS.exists(Dir), true); + + // Add a file to the FS to change its size. + // It seems that directory sizes reported are not meaningful, + // and should not be used to check for size changes. + // This test is setup only to trigger a size change so that we + // know we are excluding directories from reporting. + llvm::SmallString<128> FilePath = Dir; + llvm::sys::path::append(FilePath, "file.h"); + { + std::error_code EC; + llvm::raw_fd_ostream TempFile(FilePath, EC); + ASSERT_FALSE(EC); + } + + // We do not report directory size changes. + auto InvalidEntries = SharedCache.getOutOfDateEntries(*FS); + EXPECT_EQ(InvalidEntries.size(), 0u); +} diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 409f1c4f71834..d4fb56e6a39b7 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2417,7 +2417,11 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#ifndef __ARM_NEON_H\n"; OS << "#define __ARM_NEON_H\n\n"; - OS << "#ifndef __ARM_FP\n"; + OS << "#if !defined(__arm__) && !defined(__aarch64__) && " + "!defined(__arm64ec__)\n"; + OS << "#error \" is intended only for ARM and AArch64 " + "targets\"\n"; + OS << "#elif !defined(__ARM_FP)\n"; OS << "#error \"NEON intrinsics not available with the soft-float ABI. " "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; OS << "#else\n\n"; diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index fb2aee8e42ee2..9a0426ff29470 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -162,9 +162,7 @@ endmacro() # OBJECT_LIBS # PARENT_TARGET # ADDITIONAL_HEADERS
-# EXTENSIONS -# C_STANDARD -# CXX_STANDARD ) +# EXTENSIONS ) function(add_compiler_rt_runtime name type) if(NOT type MATCHES "^(OBJECT|STATIC|SHARED|MODULE)$") message(FATAL_ERROR @@ -173,7 +171,7 @@ function(add_compiler_rt_runtime name type) endif() cmake_parse_arguments(LIB "" - "PARENT_TARGET;C_STANDARD;CXX_STANDARD" + "PARENT_TARGET" "OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;DEPS;LINK_LIBS;OBJECT_LIBS;ADDITIONAL_HEADERS;EXTENSIONS" ${ARGN}) set(libnames) @@ -362,12 +360,6 @@ function(add_compiler_rt_runtime name type) set_target_link_flags(${libname} ${extra_link_flags_${libname}}) set_property(TARGET ${libname} APPEND PROPERTY COMPILE_DEFINITIONS ${LIB_DEFS}) - if(LIB_C_STANDARD) - set_property(TARGET ${libname} PROPERTY C_STANDARD ${LIB_C_STANDARD}) - endif() - if(LIB_CXX_STANDARD) - set_property(TARGET ${libname} PROPERTY CXX_STANDARD ${LIB_CXX_STANDARD}) - endif() set_target_output_directories(${libname} ${output_dir_${libname}}) install(TARGETS ${libname} ARCHIVE DESTINATION ${install_dir_${libname}} diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index b86bb1bca7cda..c62855835512d 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -26,7 +26,6 @@ builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_H builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG) builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG) builtin_check_c_compiler_flag(-fcf-protection=full COMPILER_RT_HAS_FCF_PROTECTION_FLAG) -builtin_check_c_compiler_flag(-nostdinc++ COMPILER_RT_HAS_NOSTDINCXX_FLAG) builtin_check_c_compiler_source(COMPILER_RT_HAS_ATOMIC_KEYWORD " diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 3ab92403d4168..5e832315f3666 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -6,7 +6,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) cmake_minimum_required(VERSION 3.20.0) set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) - project(CompilerRTBuiltins C CXX ASM) + project(CompilerRTBuiltins C ASM) set(COMPILER_RT_STANDALONE_BUILD TRUE) set(COMPILER_RT_BUILTINS_STANDALONE_BUILD TRUE) @@ -64,8 +64,6 @@ include(CMakePushCheckState) option(COMPILER_RT_BUILTINS_HIDE_SYMBOLS "Do not export any symbols from the static library." ON) -include_directories(../../../third-party/siphash/include) - # TODO: Need to add a mechanism for logging errors when builtin source files are # added to a sub-directory and not this CMakeLists file. set(GENERIC_SOURCES @@ -591,7 +589,6 @@ set(aarch64_SOURCES ${GENERIC_TF_SOURCES} ${GENERIC_SOURCES} cpu_model/aarch64.c - aarch64/emupac.cpp aarch64/fp_mode.c ) @@ -839,7 +836,7 @@ else () append_list_if(COMPILER_RT_ENABLE_CET -fcf-protection=full BUILTIN_CFLAGS) endif() - append_list_if(COMPILER_RT_HAS_NOSTDINCXX_FLAG -nostdinc++ BUILTIN_CFLAGS) + append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS) append_list_if(COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG -Werror=builtin-declaration-mismatch BUILTIN_CFLAGS) # Don't embed directives for picking any specific CRT @@ -961,8 +958,6 @@ else () SOURCES ${${arch}_SOURCES} DEFS ${BUILTIN_DEFS} CFLAGS ${BUILTIN_CFLAGS_${arch}} - C_STANDARD 11 - CXX_STANDARD 17 PARENT_TARGET builtins) cmake_pop_check_state() endif () diff --git a/compiler-rt/lib/builtins/aarch64/emupac.cpp b/compiler-rt/lib/builtins/aarch64/emupac.cpp deleted file mode 100644 index 4e28667718754..0000000000000 --- a/compiler-rt/lib/builtins/aarch64/emupac.cpp +++ /dev/null @@ -1,140 +0,0 @@ -//===--- emupac.cpp - Emulated PAC implementation -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements Emulated PAC using SipHash_1_3 as the IMPDEF hashing -// scheme. -// -//===----------------------------------------------------------------------===// - -#include - -#include "siphash/SipHash.h" - -// EmuPAC implements runtime emulation of PAC instructions. If the current -// CPU supports PAC, EmuPAC uses real PAC instructions. Otherwise, it uses the -// emulation, which is effectively an implementation of PAC with an IMPDEF -// hashing scheme based on SipHash_1_3. -// -// The purpose of the emulation is to allow programs to be built to be portable -// to machines without PAC support, with some performance loss and increased -// probability of false positives (due to not being able to portably determine -// the VA size), while being functionally almost equivalent to running on a -// machine with PAC support. One example of a use case is if PAC is used in -// production as a security mitigation, but the testing environment is -// heterogeneous (i.e. some machines lack PAC support). In this case we would -// like the testing machines to be able to detect issues resulting -// from the use of PAC instructions that would affect production by running -// tests. This can be achieved by building test binaries with EmuPAC and -// production binaries with real PAC. -// -// EmuPAC should not be used in production and is only intended for testing use -// cases. This is not only because of the performance costs, which will exist -// even on PAC-supporting machines because of the function call overhead for -// each sign/auth operation, but because it provides weaker security compared to -// real PAC: the key is constant and public, which means that we do not mix a -// global secret. -// -// The emulation assumes that the VA size is at most 48 bits. The architecture -// as of ARMv8.2, which was the last architecture version in which PAC was not -// mandatory, permitted VA size up to 52 bits via ARMv8.2-LVA, but we are -// unaware of an ARMv8.2 CPU that implemented ARMv8.2-LVA. - -const uint64_t max_va_size = 48; -const uint64_t pac_mask = ((1ULL << 55) - 1) & ~((1ULL << max_va_size) - 1); -const uint64_t ttbr1_mask = 1ULL << 55; - -// Determine whether PAC is supported without accessing memory. This utilizes -// the XPACLRI instruction which will copy bit 55 of x30 into at least bit 54 if -// PAC is supported and acts as a NOP if PAC is not supported. -static bool pac_supported() { - register uintptr_t x30 __asm__("x30") = 1ULL << 55; - __asm__ __volatile__("xpaclri" : "+r"(x30)); - return x30 & (1ULL << 54); -} - -// This asm snippet is used to force the creation of a frame record when -// calling the EmuPAC functions. This is important because the EmuPAC functions -// may crash if an auth failure is detected and may be unwound past using a -// frame pointer based unwinder. -#ifdef __GCC_HAVE_DWARF2_CFI_ASM -#define CFI_INST(inst) inst -#else -#define CFI_INST(inst) -#endif - -// clang-format off -#define FRAME_POINTER_WRAP(sym) \ - CFI_INST(".cfi_startproc\n") \ - "stp x29, x30, [sp, #-16]!\n" \ - CFI_INST(".cfi_def_cfa_offset 16\n") \ - "mov x29, sp\n" \ - CFI_INST(".cfi_def_cfa w29, 16\n") \ - CFI_INST(".cfi_offset w30, -8\n") \ - CFI_INST(".cfi_offset w29, -16\n") \ - "bl " #sym "\n" \ - CFI_INST(".cfi_def_cfa wsp, 16\n") \ - "ldp x29, x30, [sp], #16\n" \ - CFI_INST(".cfi_def_cfa_offset 0\n") \ - CFI_INST(".cfi_restore w30\n") \ - CFI_INST(".cfi_restore w29\n") \ - "ret\n" \ - CFI_INST(".cfi_endproc\n") -// clang-format on - -// Emulated DA key value. -static const uint8_t emu_da_key[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, - 0x4a, 0x79, 0x6f, 0xec, 0x8b, 0x1b, - 0x42, 0x87, 0x81, 0xd4}; - -extern "C" [[gnu::flatten]] uint64_t -__emupac_pacda_impl(uint64_t ptr, uint64_t disc) { - if (pac_supported()) { - __asm__ __volatile__(".arch_extension pauth\npacda %0, %1" - : "+r"(ptr) - : "r"(disc)); - return ptr; - } - if (ptr & ttbr1_mask) { - if ((ptr & pac_mask) != pac_mask) { - return ptr | pac_mask; - } - } else { - if (ptr & pac_mask) { - return ptr & ~pac_mask; - } - } - uint64_t hash; - siphash<1, 3>(reinterpret_cast(&ptr), 8, emu_da_key, - *reinterpret_cast(&hash)); - return (ptr & ~pac_mask) | (hash & pac_mask); -} - -__asm__(".globl __emupac_pacda\n" - "__emupac_pacda:\n" FRAME_POINTER_WRAP(__emupac_pacda_impl)); - -extern "C" [[gnu::flatten]] uint64_t -__emupac_autda_impl(uint64_t ptr, uint64_t disc) { - if (pac_supported()) { - __asm__ __volatile__(".arch_extension pauth\nautda %0, %1" - : "+r"(ptr) - : "r"(disc)); - return ptr; - } - uint64_t ptr_without_pac = - (ptr & ttbr1_mask) ? (ptr | pac_mask) : (ptr & ~pac_mask); - uint64_t hash; - siphash<1, 3>(reinterpret_cast(&ptr_without_pac), 8, emu_da_key, - *reinterpret_cast(&hash)); - if (((ptr & ~pac_mask) | (hash & pac_mask)) != ptr) { - __builtin_trap(); - } - return ptr_without_pac; -} - -__asm__(".globl __emupac_autda\n" - "__emupac_autda:\n" FRAME_POINTER_WRAP(__emupac_autda_impl)); diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h index 7c7f8cb64aa9a..48862f3642175 100644 --- a/compiler-rt/lib/builtins/int_types.h +++ b/compiler-rt/lib/builtins/int_types.h @@ -223,7 +223,7 @@ typedef union { #define CRT_HAS_TF_MODE #endif -#if __STDC_VERSION__ >= 199901L && !defined(_MSC_VER) +#if __STDC_VERSION__ >= 199901L typedef float _Complex Fcomplex; typedef double _Complex Dcomplex; typedef long double _Complex Lcomplex; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp index 4940062eeae47..4c1e005289230 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -87,10 +88,6 @@ #include #include -#define _KERNEL // to declare 'shminfo' structure -#include -#undef _KERNEL - #undef IOC_DIRMASK // Include these after system headers to avoid name clashes and ambiguities. @@ -141,8 +138,6 @@ unsigned struct_timeb_sz = sizeof(struct timeb); unsigned struct_msqid_ds_sz = sizeof(struct msqid_ds); unsigned struct_mq_attr_sz = sizeof(struct mq_attr); unsigned struct_statvfs_sz = sizeof(struct statvfs); -unsigned struct_shminfo_sz = sizeof(struct shminfo); -unsigned struct_shm_info_sz = sizeof(struct shm_info); unsigned struct_regmatch_sz = sizeof(regmatch_t); unsigned struct_regex_sz = sizeof(regex_t); unsigned struct_fstab_sz = sizeof(struct fstab); @@ -156,9 +151,6 @@ const uptr sig_err = (uptr)SIG_ERR; const uptr sa_siginfo = (uptr)SA_SIGINFO; int shmctl_ipc_stat = (int)IPC_STAT; -int shmctl_ipc_info = (int)IPC_INFO; -int shmctl_shm_info = (int)SHM_INFO; -int shmctl_shm_stat = (int)SHM_STAT; unsigned struct_utmpx_sz = sizeof(struct utmpx); int map_fixed = MAP_FIXED; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h index 8ce73f206fd88..382b67ce78ebd 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h @@ -419,12 +419,14 @@ struct __sanitizer_wordexp_t { typedef void __sanitizer_FILE; -extern unsigned struct_shminfo_sz; -extern unsigned struct_shm_info_sz; extern int shmctl_ipc_stat; -extern int shmctl_ipc_info; -extern int shmctl_shm_info; -extern int shmctl_shm_stat; + +// This simplifies generic code +#define struct_shminfo_sz -1 +#define struct_shm_info_sz -1 +#define shmctl_shm_stat -1 +#define shmctl_ipc_info -1 +#define shmctl_shm_info -1 extern unsigned struct_utmpx_sz; diff --git a/compiler-rt/test/builtins/Unit/aarch64/emupac.c b/compiler-rt/test/builtins/Unit/aarch64/emupac.c deleted file mode 100644 index 60ad9444801d8..0000000000000 --- a/compiler-rt/test/builtins/Unit/aarch64/emupac.c +++ /dev/null @@ -1,62 +0,0 @@ -// REQUIRES: librt_has_emupac -// RUN: %clang_builtins %s %librt -o %t -// RUN: %run %t 1 -// RUN: %run %t 2 -// RUN: %expect_crash %run %t 3 -// RUN: %expect_crash %run %t 4 - -#include -#include -#include - -uint64_t __emupac_pacda(uint64_t ptr, uint64_t disc); -uint64_t __emupac_autda(uint64_t ptr, uint64_t disc); - -int main(int argc, char **argv) { - char stack_object1; - uint64_t ptr1 = (uint64_t)&stack_object1; - - char stack_object2; - uint64_t ptr2 = (uint64_t)&stack_object2; - - switch (atoi(argv[1])) { - case 1: { - // Normal case: test that a pointer authenticated with the same - // discriminator is equal to the original pointer. - uint64_t signed_ptr = __emupac_pacda(ptr1, ptr2); - uint64_t authed_ptr = __emupac_autda(signed_ptr, ptr2); - if (authed_ptr != ptr1) { - printf("0x%lx != 0x%lx\n", authed_ptr, ptr1); - return 1; - } - break; - } - case 2: { - // Test that negative addresses (addresses controlled by TTBR1, - // conventionally kernel addresses) can be signed and authenticated. - uint64_t unsigned_ptr = -1ULL; - uint64_t signed_ptr = __emupac_pacda(unsigned_ptr, ptr2); - uint64_t authed_ptr = __emupac_autda(signed_ptr, ptr2); - if (authed_ptr != unsigned_ptr) { - printf("0x%lx != 0x%lx\n", authed_ptr, unsigned_ptr); - return 1; - } - break; - } - case 3: { - // Test that a corrupted signature crashes the program. - uint64_t signed_ptr = __emupac_pacda(ptr1, ptr2); - __emupac_autda(signed_ptr + (1ULL << 48), ptr2); - break; - } - case 4: { - // Test that signing a pointer with signature bits already set produces a pointer - // that would fail auth. - uint64_t signed_ptr = __emupac_pacda(ptr1 + (1ULL << 48), ptr2); - __emupac_autda(signed_ptr, ptr2); - break; - } - } - - return 0; -} diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md index b2dbbcb5630f4..dfaa2eb6e03fe 100644 --- a/flang/docs/ReleaseNotes.md +++ b/flang/docs/ReleaseNotes.md @@ -25,6 +25,8 @@ page](https://llvm.org/releases/). ## Major New Features * Initial support for VOLATILE variables and procedure interface arguments has been added. +* OpenMP support is stable and no longer considered experimental. All of OpenMP 3.1 is + supported, along with much of OpenMP 4.0 and some parts of later standards. ## Bug Fixes diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index a36b8719e365d..e9aeed18ab0b7 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -472,7 +472,6 @@ struct NodeVisitor { READ_FEATURE(OmpIteration) READ_FEATURE(OmpIterationOffset) READ_FEATURE(OmpIterationVector) - READ_FEATURE(OmpEndAllocators) READ_FEATURE(OmpEndBlockDirective) READ_FEATURE(OmpEndCriticalDirective) READ_FEATURE(OmpEndLoopDirective) diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/include/flang/Lower/OpenMP/Clauses.h similarity index 99% rename from flang/lib/Lower/OpenMP/Clauses.h rename to flang/include/flang/Lower/OpenMP/Clauses.h index d7ab21d428e32..7f317f05f67b7 100644 --- a/flang/lib/Lower/OpenMP/Clauses.h +++ b/flang/include/flang/Lower/OpenMP/Clauses.h @@ -179,6 +179,7 @@ using IteratorSpecifier = tomp::type::IteratorSpecifierT; using DefinedOperator = tomp::type::DefinedOperatorT; using ProcedureDesignator = tomp::type::ProcedureDesignatorT; using ReductionOperator = tomp::type::ReductionIdentifierT; +using ReductionOperatorList = List; using DependenceType = tomp::type::DependenceType; using Prescriptiveness = tomp::type::Prescriptiveness; diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/include/flang/Lower/Support/ReductionProcessor.h similarity index 85% rename from flang/lib/Lower/OpenMP/ReductionProcessor.h rename to flang/include/flang/Lower/Support/ReductionProcessor.h index a7198b48f6b4e..72d8a0096f511 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.h +++ b/flang/include/flang/Lower/Support/ReductionProcessor.h @@ -13,13 +13,12 @@ #ifndef FORTRAN_LOWER_REDUCTIONPROCESSOR_H #define FORTRAN_LOWER_REDUCTIONPROCESSOR_H -#include "Clauses.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/type.h" -#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Location.h" #include "mlir/IR/Types.h" @@ -65,6 +64,9 @@ class ReductionProcessor { static ReductionIdentifier getReductionType(omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp); + static ReductionIdentifier + getReductionType(const fir::ReduceOperationEnum &pd); + static bool supportedIntrinsicProcReduction(const omp::clause::ProcedureDesignator &pd); @@ -78,10 +80,9 @@ class ReductionProcessor { const fir::KindMapping &kindMap, mlir::Type ty, bool isByRef); - static std::string - getReductionName(omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp, - const fir::KindMapping &kindMap, mlir::Type ty, - bool isByRef); + static std::string getReductionName(ReductionIdentifier redId, + const fir::KindMapping &kindMap, + mlir::Type ty, bool isByRef); /// This function returns the identity value of the operator \p /// reductionOpName. For example: @@ -113,22 +114,23 @@ class ReductionProcessor { /// symbol table. The declaration has a constant initializer with the neutral /// value `initValue`, and the reduction combiner carried over from `reduce`. /// TODO: add atomic region. - static mlir::omp::DeclareReductionOp - createDeclareReduction(AbstractConverter &builder, - llvm::StringRef reductionOpName, - const ReductionIdentifier redId, mlir::Type type, - mlir::Location loc, bool isByRef); + template + static OpType createDeclareReduction(AbstractConverter &builder, + llvm::StringRef reductionOpName, + const ReductionIdentifier redId, + mlir::Type type, mlir::Location loc, + bool isByRef); /// Creates a reduction declaration and associates it with an OpenMP block /// directive. - template + template static void processReductionArguments( mlir::Location currentLocation, lower::AbstractConverter &converter, - const T &reduction, llvm::SmallVectorImpl &reductionVars, + const RedOperatorListTy &redOperatorList, + llvm::SmallVectorImpl &reductionVars, llvm::SmallVectorImpl &reduceVarByRef, llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl &reductionSymbols, - mlir::omp::ReductionModifierAttr *reductionMod = nullptr); + const llvm::SmallVectorImpl &reductionSymbols); }; template diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index 2845080030b92..7bd96ac3ea631 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -112,7 +112,7 @@ def fir_ReduceOperationEnum : I32BitEnumAttr<"ReduceOperationEnum", I32BitEnumAttrCaseBit<"MIN", 7, "min">, I32BitEnumAttrCaseBit<"IAND", 8, "iand">, I32BitEnumAttrCaseBit<"IOR", 9, "ior">, - I32BitEnumAttrCaseBit<"EIOR", 10, "eior"> + I32BitEnumAttrCaseBit<"IEOR", 10, "ieor"> ]> { let separator = ", "; let cppNamespace = "::fir"; diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index f440580f0878a..e3f5c4403002a 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -3518,7 +3518,7 @@ def fir_BoxTotalElementsOp def YieldOp : fir_Op<"yield", [Pure, ReturnLike, Terminator, - ParentOneOf<["LocalitySpecifierOp"]>]> { + ParentOneOf<["LocalitySpecifierOp", "DeclareReductionOp"]>]> { let summary = "loop yield and termination operation"; let description = [{ "fir.yield" yields SSA values from a fir dialect op region and @@ -3656,6 +3656,103 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> { let hasRegionVerifier = 1; } +def fir_DeclareReductionOp : fir_Op<"declare_reduction", [IsolatedFromAbove, + Symbol]> { + let summary = "declares a reduction kind"; + let description = [{ + Note: this operation is adapted from omp::DeclareReductionOp. There is a lot + duplication at the moment. TODO Combine both ops into one. See: + https://discourse.llvm.org/t/dialect-for-data-locality-sharing-specifiers-clauses-in-openmp-openacc-and-do-concurrent/86108. + + Declares a `do concurrent` reduction. This requires two mandatory and three + optional regions. + + 1. The optional alloc region specifies how to allocate the thread-local + reduction value. This region should not contain control flow and all + IR should be suitable for inlining straight into an entry block. In + the common case this is expected to contain only allocas. It is + expected to `fir.yield` the allocated value on all control paths. + If allocation is conditional (e.g. only allocate if the mold is + allocated), this should be done in the initilizer region and this + region not included. The alloc region is not used for by-value + reductions (where allocation is implicit). + 2. The initializer region specifies how to initialize the thread-local + reduction value. This is usually the neutral element of the reduction. + For convenience, the region has an argument that contains the value + of the reduction accumulator at the start of the reduction. If an alloc + region is specified, there is a second block argument containing the + address of the allocated memory. The initializer region is expected to + `fir.yield` the new value on all control flow paths. + 3. The reduction region specifies how to combine two values into one, i.e. + the reduction operator. It accepts the two values as arguments and is + expected to `fir.yield` the combined value on all control flow paths. + 4. The atomic reduction region is optional and specifies how two values + can be combined atomically given local accumulator variables. It is + expected to store the combined value in the first accumulator variable. + 5. The cleanup region is optional and specifies how to clean up any memory + allocated by the initializer region. The region has an argument that + contains the value of the thread-local reduction accumulator. This will + be executed after the reduction has completed. + + Note that the MLIR type system does not allow for type-polymorphic + reductions. Separate reduction declarations should be created for different + element and accumulator types. + + For initializer and reduction regions, the operand to `fir.yield` must + match the parent operation's results. + }]; + + let arguments = (ins SymbolNameAttr:$sym_name, + TypeAttr:$type); + + let regions = (region MaxSizedRegion<1>:$allocRegion, + AnyRegion:$initializerRegion, + AnyRegion:$reductionRegion, + AnyRegion:$atomicReductionRegion, + AnyRegion:$cleanupRegion); + + let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword " + "( `alloc` $allocRegion^ )? " + "`init` $initializerRegion " + "`combiner` $reductionRegion " + "( `atomic` $atomicReductionRegion^ )? " + "( `cleanup` $cleanupRegion^ )? "; + + let extraClassDeclaration = [{ + mlir::BlockArgument getAllocMoldArg() { + auto ®ion = getAllocRegion(); + return region.empty() ? nullptr : region.getArgument(0); + } + mlir::BlockArgument getInitializerMoldArg() { + return getInitializerRegion().getArgument(0); + } + mlir::BlockArgument getInitializerAllocArg() { + return getAllocRegion().empty() ? + nullptr : getInitializerRegion().getArgument(1); + } + mlir::BlockArgument getReductionLhsArg() { + return getReductionRegion().getArgument(0); + } + mlir::BlockArgument getReductionRhsArg() { + return getReductionRegion().getArgument(1); + } + mlir::BlockArgument getAtomicReductionLhsArg() { + auto ®ion = getAtomicReductionRegion(); + return region.empty() ? nullptr : region.getArgument(0); + } + mlir::BlockArgument getAtomicReductionRhsArg() { + auto ®ion = getAtomicReductionRegion(); + return region.empty() ? nullptr : region.getArgument(1); + } + mlir::BlockArgument getCleanupAllocArg() { + auto ®ion = getCleanupRegion(); + return region.empty() ? nullptr : region.getArgument(0); + } + }]; + + let hasRegionVerifier = 1; +} + def fir_DoConcurrentOp : fir_Op<"do_concurrent", [SingleBlock, AutomaticAllocationScope]> { let summary = "do concurrent loop wrapper"; @@ -3694,6 +3791,25 @@ def fir_LocalSpecifier { ); } +def fir_ReduceSpecifier { + dag arguments = (ins + Variadic:$reduce_vars, + OptionalAttr:$reduce_byref, + + // This introduces redundency in how reductions are modelled. In particular, + // a single reduction is represented by 2 attributes: + // + // 1. `$reduce_syms` which is a list of `DeclareReductionOp`s. + // 2. `$reduce_attrs` which is an array of `fir::ReduceAttr` values. + // + // The first makes it easier to map `do concurrent` to parallization models + // (e.g. OpenMP and OpenACC) while the second makes it easier to map it to + // nests of `fir.do_loop ... unodered` ops. + OptionalAttr:$reduce_syms, + OptionalAttr:$reduce_attrs + ); +} + def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", [AttrSizedOperandSegments, DeclareOpInterfaceMethods, @@ -3703,7 +3819,7 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", let description = [{ An operation that models a Fortran `do concurrent` loop's header and block. This is a single-region single-block terminator op that is expected to - terminate the region of a `omp.do_concurrent` wrapper op. + terminate the region of a `fir.do_concurrent` wrapper op. This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to `scf.parallel`, a loop nest takes 3 groups of SSA values as operands that @@ -3741,8 +3857,6 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", - `lowerBound`: The group of SSA values for the nest's lower bounds. - `upperBound`: The group of SSA values for the nest's upper bounds. - `step`: The group of SSA values for the nest's steps. - - `reduceOperands`: The reduction SSA values, if any. - - `reduceAttrs`: Attributes to store reduction operations, if any. - `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to LLVM. }]; @@ -3751,12 +3865,12 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", Variadic:$lowerBound, Variadic:$upperBound, Variadic:$step, - Variadic:$reduceOperands, - OptionalAttr:$reduceAttrs, OptionalAttr:$loopAnnotation ); - let arguments = !con(opArgs, fir_LocalSpecifier.arguments); + let arguments = !con(opArgs, + fir_LocalSpecifier.arguments, + fir_ReduceSpecifier.arguments); let regions = (region SizedRegion<1>:$region); @@ -3764,9 +3878,17 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", let hasVerifier = 1; let extraClassDeclaration = [{ - unsigned getNumInductionVars() { return getLowerBound().size(); } + unsigned getNumInductionVars() { + return getLowerBound().size(); + } - unsigned getNumLocalOperands() { return getLocalVars().size(); } + unsigned getNumLocalOperands() { + return getLocalVars().size(); + } + + unsigned getNumReduceOperands() { + return getReduceVars().size(); + } mlir::Block::BlockArgListType getInductionVars() { return getBody()->getArguments().slice(0, getNumInductionVars()); @@ -3777,19 +3899,15 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", getNumLocalOperands()); } + mlir::Block::BlockArgListType getRegionReduceArgs() { + return getBody()->getArguments().slice(getNumInductionVars() + + getNumLocalOperands(), + getNumReduceOperands()); + } + /// Number of operands controlling the loop unsigned getNumControlOperands() { return getLowerBound().size() * 3; } - // Get Number of reduction operands - unsigned getNumReduceOperands() { - return getReduceOperands().size(); - } - - mlir::Operation::operand_range getLocalOperands() { - return getOperands() - .slice(getNumControlOperands() + getNumReduceOperands(), - getNumLocalOperands()); - } }]; } diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index e3eed6aed8079..32b6ca45609b6 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -578,7 +578,6 @@ class ParseTreeDumper { NODE(parser, OmpDetachClause) NODE(parser, OmpDoacrossClause) NODE(parser, OmpDestroyClause) - NODE(parser, OmpEndAllocators) NODE(parser, OmpEndBlockDirective) NODE(parser, OmpEndCriticalDirective) NODE(parser, OmpEndLoopDirective) @@ -711,8 +710,6 @@ class ParseTreeDumper { NODE(parser, OpenMPDepobjConstruct) NODE(parser, OpenMPUtilityConstruct) NODE(parser, OpenMPDispatchConstruct) - NODE(parser, OmpDispatchDirective) - NODE(parser, OmpEndDispatchDirective) NODE(parser, OpenMPFlushConstruct) NODE(parser, OpenMPLoopConstruct) NODE(parser, OpenMPExecutableAllocate) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 43954ff735361..ab2dde7d5dfbe 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4598,8 +4598,11 @@ struct OmpClauseList { struct OmpDirectiveSpecification { ENUM_CLASS(Flags, None, DeprecatedSyntax); TUPLE_CLASS_BOILERPLATE(OmpDirectiveSpecification); + const OmpDirectiveName &DirName() const { + return std::get(t); + } llvm::omp::Directive DirId() const { // - return std::get(t).v; + return DirName().v; } const OmpArgumentList &Arguments() const; const OmpClauseList &Clauses() const; @@ -4839,17 +4842,17 @@ struct OpenMPExecutableAllocate { t; }; -EMPTY_CLASS(OmpEndAllocators); - -// 6.7 Allocators construct [OpenMP 5.2] -// allocators-construct -> ALLOCATORS [allocate-clause [,]] -// allocate-stmt -// [omp-end-allocators-construct] +// Ref: [5.2:180-181], [6.0:315] +// +// allocators-construct -> +// ALLOCATORS [allocate-clause...] +// block +// [END ALLOCATORS] struct OpenMPAllocatorsConstruct { TUPLE_CLASS_BOILERPLATE(OpenMPAllocatorsConstruct); CharBlock source; - std::tuple, - std::optional> + std::tuple> t; }; @@ -4936,19 +4939,11 @@ struct OpenMPDepobjConstruct { // nocontext-clause | // novariants-clause | // nowait-clause -struct OmpDispatchDirective { - TUPLE_CLASS_BOILERPLATE(OmpDispatchDirective); - CharBlock source; - std::tuple t; -}; - -EMPTY_CLASS(OmpEndDispatchDirective); - struct OpenMPDispatchConstruct { TUPLE_CLASS_BOILERPLATE(OpenMPDispatchConstruct); CharBlock source; - std::tuple> + std::tuple> t; }; diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 3d9f06308d8c1..21e6b3c3dd50d 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1157,23 +1157,28 @@ template semantics::UnorderedSymbolSet CollectCudaSymbols( bool HasCUDAImplicitTransfer(const Expr &expr) { semantics::UnorderedSymbolSet hostSymbols; semantics::UnorderedSymbolSet deviceSymbols; + semantics::UnorderedSymbolSet cudaSymbols{CollectCudaSymbols(expr)}; SymbolVector symbols{GetSymbolVector(expr)}; std::reverse(symbols.begin(), symbols.end()); bool skipNext{false}; for (const Symbol &sym : symbols) { - bool isComponent{sym.owner().IsDerivedType()}; - bool skipComponent{false}; - if (!skipNext) { - if (IsCUDADeviceSymbol(sym)) { - deviceSymbols.insert(sym); - } else if (isComponent) { - skipComponent = true; // Component is not device. Look on the base. - } else { - hostSymbols.insert(sym); + if (cudaSymbols.find(sym) != cudaSymbols.end()) { + bool isComponent{sym.owner().IsDerivedType()}; + bool skipComponent{false}; + if (!skipNext) { + if (IsCUDADeviceSymbol(sym)) { + deviceSymbols.insert(sym); + } else if (isComponent) { + skipComponent = true; // Component is not device. Look on the base. + } else { + hostSymbols.insert(sym); + } } + skipNext = isComponent && !skipComponent; + } else { + skipNext = false; } - skipNext = isComponent && !skipComponent; } bool hasConstant{HasConstant(expr)}; return (hasConstant || (hostSymbols.size() > 0)) && deviceSymbols.size() > 0; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 987fd3095fdf6..33c1f1e7a3c3a 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -31,6 +31,7 @@ #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/Runtime.h" #include "flang/Lower/StatementContext.h" +#include "flang/Lower/Support/ReductionProcessor.h" #include "flang/Lower/Support/Utils.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/CUFCommon.h" @@ -127,9 +128,8 @@ struct IncrementLoopInfo { bool isConcurrent; llvm::SmallVector localSymList; llvm::SmallVector localInitSymList; - llvm::SmallVector< - std::pair> - reduceSymList; + llvm::SmallVector reduceSymList; + llvm::SmallVector reduceOperatorList; llvm::SmallVector sharedSymList; mlir::Value loopVariable = nullptr; @@ -1993,7 +1993,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { case Fortran::parser::ReductionOperator::Operator::Ior: return fir::ReduceOperationEnum::IOR; case Fortran::parser::ReductionOperator::Operator::Ieor: - return fir::ReduceOperationEnum::EIOR; + return fir::ReduceOperationEnum::IEOR; } llvm_unreachable("illegal reduction operator"); } @@ -2027,8 +2027,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { std::get(reduceList->t)); for (const Fortran::parser::Name &x : std::get>(reduceList->t)) { - info.reduceSymList.push_back( - std::make_pair(reduce_operation, x.symbol)); + info.reduceSymList.push_back(x.symbol); + info.reduceOperatorList.push_back(reduce_operation); } } } @@ -2089,6 +2089,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { assign.u = Fortran::evaluate::Assignment::BoundsSpec{}; genAssignment(assign); } + for (const Fortran::semantics::Symbol *sym : info.sharedSymList) { const auto *hostDetails = sym->detailsIf(); @@ -2112,6 +2113,45 @@ class FirConverter : public Fortran::lower::AbstractConverter { } } + llvm::SmallVector reduceVarByRef; + llvm::SmallVector reductionDeclSymbols; + llvm::SmallVector nestReduceAttrs; + + for (const auto &reduceOp : info.reduceOperatorList) + nestReduceAttrs.push_back( + fir::ReduceAttr::get(builder->getContext(), reduceOp)); + + llvm::SmallVector reduceVars; + Fortran::lower::omp::ReductionProcessor rp; + rp.processReductionArguments( + toLocation(), *this, info.reduceOperatorList, reduceVars, + reduceVarByRef, reductionDeclSymbols, info.reduceSymList); + + doConcurrentLoopOp.getReduceVarsMutable().assign(reduceVars); + doConcurrentLoopOp.setReduceSymsAttr( + reductionDeclSymbols.empty() + ? nullptr + : mlir::ArrayAttr::get(builder->getContext(), + reductionDeclSymbols)); + doConcurrentLoopOp.setReduceAttrsAttr( + nestReduceAttrs.empty() + ? nullptr + : mlir::ArrayAttr::get(builder->getContext(), nestReduceAttrs)); + doConcurrentLoopOp.setReduceByrefAttr( + reduceVarByRef.empty() ? nullptr + : mlir::DenseBoolArrayAttr::get( + builder->getContext(), reduceVarByRef)); + + for (auto [sym, reduceVar] : + llvm::zip_equal(info.reduceSymList, reduceVars)) { + auto arg = doConcurrentLoopOp.getRegion().begin()->addArgument( + reduceVar.getType(), doConcurrentLoopOp.getLoc()); + bindSymbol(*sym, hlfir::translateToExtendedValue( + reduceVar.getLoc(), *builder, hlfir::Entity{arg}, + /*contiguousHint=*/true) + .first); + } + // Note that allocatable, types with ultimate components, and type // requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130), // so no clean-up needs to be generated for these entities. @@ -2203,6 +2243,12 @@ class FirConverter : public Fortran::lower::AbstractConverter { } } + // Introduce a `do concurrent` scope to bind symbols corresponding to local, + // local_init, and reduce region arguments. + if (!incrementLoopNestInfo.empty() && + incrementLoopNestInfo.back().isConcurrent) + localSymbols.pushScope(); + // Increment loop begin code. (Infinite/while code was already generated.) if (!infiniteLoop && !whileCondition) genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); @@ -2226,6 +2272,10 @@ class FirConverter : public Fortran::lower::AbstractConverter { // This call may generate a branch in some contexts. genFIR(endDoEval, unstructuredContext); + + if (!incrementLoopNestInfo.empty() && + incrementLoopNestInfo.back().isConcurrent) + localSymbols.popScope(); } /// Generate FIR to evaluate loop control values (lower, upper and step). @@ -2408,19 +2458,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { info.stepVariable = builder->createTemporary(loc, stepValue.getType()); builder->create(loc, stepValue, info.stepVariable); } - - if (genDoConcurrent && nestReduceOperands.empty()) { - // Create DO CONCURRENT reduce operands and attributes - for (const auto &reduceSym : info.reduceSymList) { - const fir::ReduceOperationEnum reduceOperation = reduceSym.first; - const Fortran::semantics::Symbol *sym = reduceSym.second; - fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr); - nestReduceOperands.push_back(fir::getBase(exv)); - auto reduceAttr = - fir::ReduceAttr::get(builder->getContext(), reduceOperation); - nestReduceAttrs.push_back(reduceAttr); - } - } } for (auto [info, lowerValue, upperValue, stepValue] : @@ -2518,11 +2555,11 @@ class FirConverter : public Fortran::lower::AbstractConverter { builder->setInsertionPointToEnd(loopWrapperOp.getBody()); auto loopOp = builder->create( - loc, nestLBs, nestUBs, nestSts, nestReduceOperands, - nestReduceAttrs.empty() - ? nullptr - : mlir::ArrayAttr::get(builder->getContext(), nestReduceAttrs), - nullptr, /*local_vars=*/std::nullopt, /*local_syms=*/nullptr); + loc, nestLBs, nestUBs, nestSts, /*loopAnnotation=*/nullptr, + /*local_vars=*/std::nullopt, + /*local_syms=*/nullptr, /*reduce_vars=*/std::nullopt, + /*reduce_byref=*/nullptr, /*reduce_syms=*/nullptr, + /*reduce_attrs=*/nullptr); llvm::SmallVector loopBlockArgTypes( incrementLoopNestInfo.size(), builder->getIndexType()); @@ -4842,8 +4879,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { .detailsIf()) { if (details->cudaDataAttr() && *details->cudaDataAttr() != Fortran::common::CUDADataAttr::Pinned) { - // TODO: This should probably being checked in semantic and give a - // proper error. assert( nbDeviceResidentObject <= 1 && "Only one reference to the device resident object is supported"); diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index 8049cdf333173..cd80aaf553869 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -29,11 +29,11 @@ add_flang_library(FortranLower OpenMP/DataSharingProcessor.cpp OpenMP/Decomposer.cpp OpenMP/OpenMP.cpp - OpenMP/ReductionProcessor.cpp OpenMP/Utils.cpp PFTBuilder.cpp Runtime.cpp Support/PrivateReductionUtils.cpp + Support/ReductionProcessor.cpp Support/Utils.cpp SymbolMap.cpp VectorSubscripts.cpp diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 42842bcb41a74..00c9cbf0d2a8f 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -164,14 +164,13 @@ createDataEntryOp(fir::FirOpBuilder &builder, mlir::Location loc, op.setStructured(structured); op.setImplicit(implicit); op.setDataClause(dataClause); - if (auto mappableTy = - mlir::dyn_cast(baseAddr.getType())) { - op.setVarType(baseAddr.getType()); + if (auto pointerLikeTy = + mlir::dyn_cast(baseAddr.getType())) { + op.setVarType(pointerLikeTy.getElementType()); } else { - assert(mlir::isa(baseAddr.getType()) && - "expected pointer-like"); - op.setVarType(mlir::cast(baseAddr.getType()) - .getElementType()); + assert(mlir::isa(baseAddr.getType()) && + "expected mappable"); + op.setVarType(baseAddr.getType()); } op->setAttr(Op::getOperandSegmentSizeAttr(), diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index 2ab91b239a3cc..6ea331c370640 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "Atomic.h" -#include "Clauses.h" #include "flang/Evaluate/expression.h" #include "flang/Evaluate/fold.h" #include "flang/Evaluate/tools.h" #include "flang/Evaluate/traverse.h" #include "flang/Evaluate/type.h" #include "flang/Lower/AbstractConverter.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" #include "flang/Lower/SymbolMap.h" diff --git a/flang/lib/Lower/OpenMP/ClauseFinder.h b/flang/lib/Lower/OpenMP/ClauseFinder.h index 3b77f2ca1d4cb..af52585452833 100644 --- a/flang/lib/Lower/OpenMP/ClauseFinder.h +++ b/flang/lib/Lower/OpenMP/ClauseFinder.h @@ -12,7 +12,7 @@ #ifndef FORTRAN_LOWER_CLAUSEFINDER_H #define FORTRAN_LOWER_CLAUSEFINDER_H -#include "Clauses.h" +#include "flang/Lower/OpenMP/Clauses.h" namespace Fortran { namespace lower { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 7bea427099a28..74087d42a8e6e 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -11,11 +11,12 @@ //===----------------------------------------------------------------------===// #include "ClauseProcessor.h" -#include "Clauses.h" #include "Utils.h" #include "flang/Lower/ConvertExprToHLFIR.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Lower/PFTBuilder.h" +#include "flang/Lower/Support/ReductionProcessor.h" #include "flang/Parser/tools.h" #include "flang/Semantics/tools.h" #include "llvm/Frontend/OpenMP/OMP.h.inc" @@ -25,6 +26,21 @@ namespace Fortran { namespace lower { namespace omp { +using ReductionModifier = + Fortran::lower::omp::clause::Reduction::ReductionModifier; + +mlir::omp::ReductionModifier translateReductionModifier(ReductionModifier mod) { + switch (mod) { + case ReductionModifier::Default: + return mlir::omp::ReductionModifier::defaultmod; + case ReductionModifier::Inscan: + return mlir::omp::ReductionModifier::inscan; + case ReductionModifier::Task: + return mlir::omp::ReductionModifier::task; + } + return mlir::omp::ReductionModifier::defaultmod; +} + /// Check for unsupported map operand types. static void checkMapType(mlir::Location location, mlir::Type type) { if (auto refType = mlir::dyn_cast(type)) @@ -1076,6 +1092,18 @@ bool ClauseProcessor::processIf( }); return found; } + +template +void collectReductionSyms( + const T &reduction, + llvm::SmallVectorImpl &reductionSyms) { + const auto &objectList{std::get(reduction.t)}; + for (const Object &object : objectList) { + const semantics::Symbol *symbol = object.sym(); + reductionSyms.push_back(symbol); + } +} + bool ClauseProcessor::processInReduction( mlir::Location currentLocation, mlir::omp::InReductionClauseOps &result, llvm::SmallVectorImpl &outReductionSyms) const { @@ -1085,10 +1113,14 @@ bool ClauseProcessor::processInReduction( llvm::SmallVector inReduceVarByRef; llvm::SmallVector inReductionDeclSymbols; llvm::SmallVector inReductionSyms; + collectReductionSyms(clause, inReductionSyms); + ReductionProcessor rp; - rp.processReductionArguments( - currentLocation, converter, clause, inReductionVars, - inReduceVarByRef, inReductionDeclSymbols, inReductionSyms); + rp.processReductionArguments( + currentLocation, converter, + std::get(clause.t), + inReductionVars, inReduceVarByRef, inReductionDeclSymbols, + inReductionSyms); // Copy local lists into the output. llvm::copy(inReductionVars, std::back_inserter(result.inReductionVars)); @@ -1416,10 +1448,23 @@ bool ClauseProcessor::processReduction( llvm::SmallVector reduceVarByRef; llvm::SmallVector reductionDeclSymbols; llvm::SmallVector reductionSyms; + collectReductionSyms(clause, reductionSyms); + + auto mod = std::get>(clause.t); + if (mod.has_value()) { + if (mod.value() == ReductionModifier::Task) + TODO(currentLocation, "Reduction modifier `task` is not supported"); + else + result.reductionMod = mlir::omp::ReductionModifierAttr::get( + converter.getFirOpBuilder().getContext(), + translateReductionModifier(mod.value())); + } + ReductionProcessor rp; - rp.processReductionArguments( - currentLocation, converter, clause, reductionVars, reduceVarByRef, - reductionDeclSymbols, reductionSyms, &result.reductionMod); + rp.processReductionArguments( + currentLocation, converter, + std::get(clause.t), + reductionVars, reduceVarByRef, reductionDeclSymbols, reductionSyms); // Copy local lists into the output. llvm::copy(reductionVars, std::back_inserter(result.reductionVars)); llvm::copy(reduceVarByRef, std::back_inserter(result.reductionByref)); @@ -1435,21 +1480,25 @@ bool ClauseProcessor::processTaskReduction( return findRepeatableClause( [&](const omp::clause::TaskReduction &clause, const parser::CharBlock &) { llvm::SmallVector taskReductionVars; - llvm::SmallVector TaskReduceVarByRef; - llvm::SmallVector TaskReductionDeclSymbols; - llvm::SmallVector TaskReductionSyms; + llvm::SmallVector taskReduceVarByRef; + llvm::SmallVector taskReductionDeclSymbols; + llvm::SmallVector taskReductionSyms; + collectReductionSyms(clause, taskReductionSyms); + ReductionProcessor rp; - rp.processReductionArguments( - currentLocation, converter, clause, taskReductionVars, - TaskReduceVarByRef, TaskReductionDeclSymbols, TaskReductionSyms); + rp.processReductionArguments( + currentLocation, converter, + std::get(clause.t), + taskReductionVars, taskReduceVarByRef, taskReductionDeclSymbols, + taskReductionSyms); // Copy local lists into the output. llvm::copy(taskReductionVars, std::back_inserter(result.taskReductionVars)); - llvm::copy(TaskReduceVarByRef, + llvm::copy(taskReduceVarByRef, std::back_inserter(result.taskReductionByref)); - llvm::copy(TaskReductionDeclSymbols, + llvm::copy(taskReductionDeclSymbols, std::back_inserter(result.taskReductionSyms)); - llvm::copy(TaskReductionSyms, std::back_inserter(outReductionSyms)); + llvm::copy(taskReductionSyms, std::back_inserter(outReductionSyms)); }); } diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 3d8c4a337a4a4..f8a1f7983b79b 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -13,12 +13,11 @@ #define FORTRAN_LOWER_CLAUSEPROCESSOR_H #include "ClauseFinder.h" -#include "Clauses.h" -#include "ReductionProcessor.h" #include "Utils.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/Bridge.h" #include "flang/Lower/DirectivesCommon.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Parser/dump-parse-tree.h" #include "flang/Parser/parse-tree.h" diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index b599d69a36272..22a07219d3a50 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Clauses.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Common/idioms.h" #include "flang/Evaluate/expression.h" diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index fded04c839fb4..ee2fc70d2e673 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -12,9 +12,9 @@ #ifndef FORTRAN_LOWER_DATASHARINGPROCESSOR_H #define FORTRAN_LOWER_DATASHARINGPROCESSOR_H -#include "Clauses.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/OpenMP.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/symbol.h" diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp index 251cba9204adc..9bfbf67bec88c 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.cpp +++ b/flang/lib/Lower/OpenMP/Decomposer.cpp @@ -12,8 +12,8 @@ #include "Decomposer.h" -#include "Clauses.h" #include "Utils.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Semantics/semantics.h" #include "flang/Tools/CrossToolHelpers.h" diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h index e3291b7c59e21..65492bd76280d 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.h +++ b/flang/lib/Lower/OpenMP/Decomposer.h @@ -8,7 +8,7 @@ #ifndef FORTRAN_LOWER_OPENMP_DECOMPOSER_H #define FORTRAN_LOWER_OPENMP_DECOMPOSER_H -#include "Clauses.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "mlir/IR/BuiltinOps.h" #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h" #include "llvm/Frontend/OpenMP/OMP.h" diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 0a56e888ac44b..4458f62eea95a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -14,16 +14,15 @@ #include "Atomic.h" #include "ClauseProcessor.h" -#include "Clauses.h" #include "DataSharingProcessor.h" #include "Decomposer.h" -#include "ReductionProcessor.h" #include "Utils.h" #include "flang/Common/idioms.h" #include "flang/Lower/Bridge.h" #include "flang/Lower/ConvertExpr.h" #include "flang/Lower/ConvertVariable.h" #include "flang/Lower/DirectivesCommon.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Lower/StatementContext.h" #include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index c226c2558e7aa..2e53f01f1da6a 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -12,9 +12,8 @@ #include "Utils.h" -#include "Clauses.h" - #include "ClauseFinder.h" +#include "flang/Lower/OpenMP/Clauses.h" #include #include #include diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index a7eb2dc5ee664..1526bd4e90233 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -9,7 +9,7 @@ #ifndef FORTRAN_LOWER_OPENMPUTILS_H #define FORTRAN_LOWER_OPENMPUTILS_H -#include "Clauses.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Location.h" #include "mlir/IR/Value.h" diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp similarity index 77% rename from flang/lib/Lower/OpenMP/ReductionProcessor.cpp rename to flang/lib/Lower/Support/ReductionProcessor.cpp index 330cef7b54c74..14b2c9836748f 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/Support/ReductionProcessor.cpp @@ -10,10 +10,11 @@ // //===----------------------------------------------------------------------===// -#include "ReductionProcessor.h" +#include "flang/Lower/Support/ReductionProcessor.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/ConvertType.h" +#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Lower/Support/PrivateReductionUtils.h" #include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/Complex.h" @@ -21,8 +22,6 @@ #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" -#include "flang/Optimizer/Support/FatalError.h" -#include "flang/Parser/tools.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "llvm/Support/CommandLine.h" #include @@ -40,35 +39,35 @@ namespace lower { namespace omp { // explicit template declarations -template void -ReductionProcessor::processReductionArguments( +template void ReductionProcessor::processReductionArguments< + mlir::omp::DeclareReductionOp, omp::clause::ReductionOperatorList>( mlir::Location currentLocation, lower::AbstractConverter &converter, - const omp::clause::Reduction &reduction, + const omp::clause::ReductionOperatorList &redOperatorList, llvm::SmallVectorImpl &reductionVars, llvm::SmallVectorImpl &reduceVarByRef, llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl &reductionSymbols, - mlir::omp::ReductionModifierAttr *reductionMod); + const llvm::SmallVectorImpl &reductionSymbols); -template void -ReductionProcessor::processReductionArguments( +template void ReductionProcessor::processReductionArguments< + fir::DeclareReductionOp, llvm::SmallVector>( mlir::Location currentLocation, lower::AbstractConverter &converter, - const omp::clause::TaskReduction &reduction, + const llvm::SmallVector &redOperatorList, llvm::SmallVectorImpl &reductionVars, llvm::SmallVectorImpl &reduceVarByRef, llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl &reductionSymbols, - mlir::omp::ReductionModifierAttr *reductionMod); + const llvm::SmallVectorImpl &reductionSymbols); -template void -ReductionProcessor::processReductionArguments( - mlir::Location currentLocation, lower::AbstractConverter &converter, - const omp::clause::InReduction &reduction, - llvm::SmallVectorImpl &reductionVars, - llvm::SmallVectorImpl &reduceVarByRef, - llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl &reductionSymbols, - mlir::omp::ReductionModifierAttr *reductionMod); +template mlir::omp::DeclareReductionOp +ReductionProcessor::createDeclareReduction( + AbstractConverter &converter, llvm::StringRef reductionOpName, + const ReductionIdentifier redId, mlir::Type type, mlir::Location loc, + bool isByRef); + +template fir::DeclareReductionOp +ReductionProcessor::createDeclareReduction( + AbstractConverter &converter, llvm::StringRef reductionOpName, + const ReductionIdentifier redId, mlir::Type type, mlir::Location loc, + bool isByRef); ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( const omp::clause::ProcedureDesignator &pd) { @@ -106,6 +105,37 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( } } +ReductionProcessor::ReductionIdentifier +ReductionProcessor::getReductionType(const fir::ReduceOperationEnum &redOp) { + switch (redOp) { + case fir::ReduceOperationEnum::Add: + return ReductionIdentifier::ADD; + case fir::ReduceOperationEnum::Multiply: + return ReductionIdentifier::MULTIPLY; + + case fir::ReduceOperationEnum::AND: + return ReductionIdentifier::AND; + case fir::ReduceOperationEnum::OR: + return ReductionIdentifier::OR; + + case fir::ReduceOperationEnum::EQV: + return ReductionIdentifier::EQV; + case fir::ReduceOperationEnum::NEQV: + return ReductionIdentifier::NEQV; + + case fir::ReduceOperationEnum::IAND: + return ReductionIdentifier::IAND; + case fir::ReduceOperationEnum::IEOR: + return ReductionIdentifier::IEOR; + case fir::ReduceOperationEnum::IOR: + return ReductionIdentifier::IOR; + case fir::ReduceOperationEnum::MAX: + return ReductionIdentifier::MAX; + case fir::ReduceOperationEnum::MIN: + return ReductionIdentifier::MIN; + } +} + bool ReductionProcessor::supportedIntrinsicProcReduction( const omp::clause::ProcedureDesignator &pd) { semantics::Symbol *sym = pd.v.sym(); @@ -136,28 +166,29 @@ ReductionProcessor::getReductionName(llvm::StringRef name, return fir::getTypeAsString(ty, kindMap, (name + byrefAddition).str()); } -std::string ReductionProcessor::getReductionName( - omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp, - const fir::KindMapping &kindMap, mlir::Type ty, bool isByRef) { +std::string +ReductionProcessor::getReductionName(ReductionIdentifier redId, + const fir::KindMapping &kindMap, + mlir::Type ty, bool isByRef) { std::string reductionName; - switch (intrinsicOp) { - case omp::clause::DefinedOperator::IntrinsicOperator::Add: + switch (redId) { + case ReductionIdentifier::ADD: reductionName = "add_reduction"; break; - case omp::clause::DefinedOperator::IntrinsicOperator::Multiply: + case ReductionIdentifier::MULTIPLY: reductionName = "multiply_reduction"; break; - case omp::clause::DefinedOperator::IntrinsicOperator::AND: + case ReductionIdentifier::AND: reductionName = "and_reduction"; break; - case omp::clause::DefinedOperator::IntrinsicOperator::EQV: + case ReductionIdentifier::EQV: reductionName = "eqv_reduction"; break; - case omp::clause::DefinedOperator::IntrinsicOperator::OR: + case ReductionIdentifier::OR: reductionName = "or_reduction"; break; - case omp::clause::DefinedOperator::IntrinsicOperator::NEQV: + case ReductionIdentifier::NEQV: reductionName = "neqv_reduction"; break; default: @@ -334,8 +365,18 @@ mlir::Value ReductionProcessor::createScalarCombiner( return reductionOp; } +template +static void genYield(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value yieldedValue) { + if constexpr (std::is_same_v) + builder.create(loc, yieldedValue); + else + builder.create(loc, yieldedValue); +} + /// Create reduction combiner region for reduction variables which are boxed /// arrays +template static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, ReductionProcessor::ReductionIdentifier redId, fir::BaseBoxType boxTy, mlir::Value lhs, @@ -369,7 +410,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value result = ReductionProcessor::createScalarCombiner( builder, loc, redId, eleTy, lhs, rhs); builder.create(loc, result, lhsValAddr); - builder.create(loc, lhsAddr); + genYield(builder, loc, lhsAddr); return; } @@ -408,10 +449,11 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder.create(loc, scalarReduction, lhsEleAddr); builder.setInsertionPointAfter(nest.outerOp); - builder.create(loc, lhsAddr); + genYield(builder, loc, lhsAddr); } // generate combiner region for reduction operations +template static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, ReductionProcessor::ReductionIdentifier redId, mlir::Type ty, mlir::Value lhs, mlir::Value rhs, @@ -426,15 +468,15 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, ty, lhsLoaded, rhsLoaded); if (isByRef) { builder.create(loc, result, lhs); - builder.create(loc, lhs); + genYield(builder, loc, lhs); } else { - builder.create(loc, result); + genYield(builder, loc, result); } return; } // all arrays should have been boxed if (auto boxTy = mlir::dyn_cast(ty)) { - genBoxCombiner(builder, loc, redId, boxTy, lhs, rhs); + genBoxCombiner(builder, loc, redId, boxTy, lhs, rhs); return; } @@ -454,15 +496,13 @@ static mlir::Type unwrapSeqOrBoxedType(mlir::Type ty) { return ty; } +template static void createReductionAllocAndInitRegions( - AbstractConverter &converter, mlir::Location loc, - mlir::omp::DeclareReductionOp &reductionDecl, + AbstractConverter &converter, mlir::Location loc, OpType &reductionDecl, const ReductionProcessor::ReductionIdentifier redId, mlir::Type type, bool isByRef) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - auto yield = [&](mlir::Value ret) { - builder.create(loc, ret); - }; + auto yield = [&](mlir::Value ret) { genYield(builder, loc, ret); }; mlir::Block *allocBlock = nullptr; mlir::Block *initBlock = nullptr; @@ -489,7 +529,9 @@ static void createReductionAllocAndInitRegions( converter, loc, type, initValue, initBlock, reductionDecl.getInitializerAllocArg(), reductionDecl.getInitializerMoldArg(), reductionDecl.getCleanupRegion(), - DeclOperationKind::Reduction); + DeclOperationKind::Reduction, /*sym=*/nullptr, + /*cannotHaveLowerBounds=*/false, + /*isDoConcurrent*/ std::is_same_v); } if (fir::isa_trivial(ty)) { @@ -512,7 +554,8 @@ static void createReductionAllocAndInitRegions( yield(boxAlloca); } -mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction( +template +OpType ReductionProcessor::createDeclareReduction( AbstractConverter &converter, llvm::StringRef reductionOpName, const ReductionIdentifier redId, mlir::Type type, mlir::Location loc, bool isByRef) { @@ -522,8 +565,7 @@ mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction( assert(!reductionOpName.empty()); - auto decl = - module.lookupSymbol(reductionOpName); + auto decl = module.lookupSymbol(reductionOpName); if (decl) return decl; @@ -532,8 +574,7 @@ mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction( if (!isByRef) type = valTy; - decl = modBuilder.create(loc, reductionOpName, - type); + decl = modBuilder.create(loc, reductionOpName, type); createReductionAllocAndInitRegions(converter, loc, decl, redId, type, isByRef); @@ -544,7 +585,7 @@ mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction( builder.setInsertionPointToEnd(&decl.getReductionRegion().back()); mlir::Value op1 = decl.getReductionRegion().front().getArgument(0); mlir::Value op2 = decl.getReductionRegion().front().getArgument(1); - genCombiner(builder, loc, redId, type, op1, op2, isByRef); + genCombiner(builder, loc, redId, type, op1, op2, isByRef); return decl; } @@ -563,64 +604,41 @@ static bool doReductionByRef(mlir::Value reductionVar) { return false; } -mlir::omp::ReductionModifier translateReductionModifier(ReductionModifier mod) { - switch (mod) { - case ReductionModifier::Default: - return mlir::omp::ReductionModifier::defaultmod; - case ReductionModifier::Inscan: - return mlir::omp::ReductionModifier::inscan; - case ReductionModifier::Task: - return mlir::omp::ReductionModifier::task; - } - return mlir::omp::ReductionModifier::defaultmod; -} - -template +template void ReductionProcessor::processReductionArguments( mlir::Location currentLocation, lower::AbstractConverter &converter, - const T &reduction, llvm::SmallVectorImpl &reductionVars, + const RedOperatorListTy &redOperatorList, + llvm::SmallVectorImpl &reductionVars, llvm::SmallVectorImpl &reduceVarByRef, llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl &reductionSymbols, - mlir::omp::ReductionModifierAttr *reductionMod) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - - if constexpr (std::is_same_v) { - auto mod = std::get>(reduction.t); - if (mod.has_value()) { - if (mod.value() == ReductionModifier::Task) - TODO(currentLocation, "Reduction modifier `task` is not supported"); - else - *reductionMod = mlir::omp::ReductionModifierAttr::get( - firOpBuilder.getContext(), translateReductionModifier(mod.value())); - } - } - - mlir::omp::DeclareReductionOp decl; - const auto &redOperatorList{ - std::get(reduction.t)}; - assert(redOperatorList.size() == 1 && "Expecting single operator"); - const auto &redOperator = redOperatorList.front(); - const auto &objectList{std::get(reduction.t)}; - - if (!std::holds_alternative(redOperator.u)) { - if (const auto *reductionIntrinsic = - std::get_if(&redOperator.u)) { - if (!ReductionProcessor::supportedIntrinsicProcReduction( - *reductionIntrinsic)) { + const llvm::SmallVectorImpl &reductionSymbols) { + if constexpr (std::is_same_v) { + // For OpenMP reduction clauses, check if the reduction operator is + // supported. + assert(redOperatorList.size() == 1 && "Expecting single operator"); + const Fortran::lower::omp::clause::ReductionOperator &redOperator = + redOperatorList.front(); + + if (!std::holds_alternative(redOperator.u)) { + if (const auto *reductionIntrinsic = + std::get_if(&redOperator.u)) { + if (!ReductionProcessor::supportedIntrinsicProcReduction( + *reductionIntrinsic)) { + return; + } + } else { return; } - } else { - return; } } + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + // Reduction variable processing common to both intrinsic operators and // procedure designators fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - for (const Object &object : objectList) { - const semantics::Symbol *symbol = object.sym(); - reductionSymbols.push_back(symbol); + for (const semantics::Symbol *symbol : reductionSymbols) { mlir::Value symVal = converter.getSymbolAddress(*symbol); mlir::Type eleType; auto refType = mlir::dyn_cast_or_null(symVal.getType()); @@ -672,52 +690,63 @@ void ReductionProcessor::processReductionArguments( reduceVarByRef.push_back(doReductionByRef(symVal)); } + unsigned idx = 0; for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) { auto redType = mlir::cast(symVal.getType()); const auto &kindMap = firOpBuilder.getKindMap(); std::string reductionName; ReductionIdentifier redId; - if (const auto &redDefinedOp = - std::get_if(&redOperator.u)) { - const auto &intrinsicOp{ - std::get( - redDefinedOp->u)}; - redId = getReductionType(intrinsicOp); - switch (redId) { - case ReductionIdentifier::ADD: - case ReductionIdentifier::MULTIPLY: - case ReductionIdentifier::AND: - case ReductionIdentifier::EQV: - case ReductionIdentifier::OR: - case ReductionIdentifier::NEQV: - break; - default: - TODO(currentLocation, - "Reduction of some intrinsic operators is not supported"); - break; - } - - reductionName = getReductionName(intrinsicOp, kindMap, redType, isByRef); - } else if (const auto *reductionIntrinsic = - std::get_if( - &redOperator.u)) { - if (!ReductionProcessor::supportedIntrinsicProcReduction( - *reductionIntrinsic)) { - TODO(currentLocation, "Unsupported intrinsic proc reduction"); + if constexpr (std::is_same_v) { + const Fortran::lower::omp::clause::ReductionOperator &redOperator = + redOperatorList.front(); + if (const auto &redDefinedOp = + std::get_if(&redOperator.u)) { + const auto &intrinsicOp{ + std::get( + redDefinedOp->u)}; + redId = getReductionType(intrinsicOp); + switch (redId) { + case ReductionIdentifier::ADD: + case ReductionIdentifier::MULTIPLY: + case ReductionIdentifier::AND: + case ReductionIdentifier::EQV: + case ReductionIdentifier::OR: + case ReductionIdentifier::NEQV: + break; + default: + TODO(currentLocation, + "Reduction of some intrinsic operators is not supported"); + break; + } + + reductionName = getReductionName(redId, kindMap, redType, isByRef); + } else if (const auto *reductionIntrinsic = + std::get_if( + &redOperator.u)) { + if (!ReductionProcessor::supportedIntrinsicProcReduction( + *reductionIntrinsic)) { + TODO(currentLocation, "Unsupported intrinsic proc reduction"); + } + redId = getReductionType(*reductionIntrinsic); + reductionName = + getReductionName(getRealName(*reductionIntrinsic).ToString(), + kindMap, redType, isByRef); + } else { + TODO(currentLocation, "Unexpected reduction type"); } - redId = getReductionType(*reductionIntrinsic); - reductionName = - getReductionName(getRealName(*reductionIntrinsic).ToString(), kindMap, - redType, isByRef); } else { - TODO(currentLocation, "Unexpected reduction type"); + // `do concurrent` reductions + redId = getReductionType(redOperatorList[idx]); + reductionName = getReductionName(redId, kindMap, redType, isByRef); } - decl = createDeclareReduction(converter, reductionName, redId, redType, - currentLocation, isByRef); + OpType decl = createDeclareReduction( + converter, reductionName, redId, redType, currentLocation, isByRef); reductionDeclSymbols.push_back( mlir::SymbolRefAttr::get(firOpBuilder.getContext(), decl.getSymName())); + ++idx; } } diff --git a/flang/lib/Lower/Support/Utils.cpp b/flang/lib/Lower/Support/Utils.cpp index c65f51ce6cacd..b9d2574a76ad0 100644 --- a/flang/lib/Lower/Support/Utils.cpp +++ b/flang/lib/Lower/Support/Utils.cpp @@ -668,9 +668,7 @@ void privatizeSymbol( const semantics::Symbol *sym = isDoConcurrent ? &symToPrivatize->GetUltimate() : symToPrivatize; - const lower::SymbolBox hsb = isDoConcurrent - ? converter.shallowLookupSymbol(*sym) - : converter.lookupOneLevelUpSymbol(*sym); + const lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); mlir::Location symLoc = hsb.getAddr().getLoc(); diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index b5cabdb830e5c..acd5a88a2582d 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -286,6 +286,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() { if (auto firLocalOp = getRegion().getParentOfType()) return &getRegion().front(); + if (auto firLocalOp = getRegion().getParentOfType()) + return &getRegion().front(); + return getEntryBlock(); } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 3bbc32f23bcfa..ecc04a6c9a2be 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2239,18 +2239,17 @@ struct XReboxOpConversion : public EmboxCommonConversion { getSubcomponentIndices(rebox, rebox.getBox(), operands, fieldIndices); if (!rebox.getSubstr().empty()) substringOffset = operands[rebox.getSubstrOperandIndex()]; - base = - genBoxOffsetGep(rewriter, loc, base, llvmBaseObjectType, zero, - /*cstInteriorIndices=*/llvm::ArrayRef(), - fieldIndices, substringOffset); + base = genBoxOffsetGep(rewriter, loc, base, llvmBaseObjectType, zero, + /*cstInteriorIndices=*/{}, fieldIndices, + substringOffset); } if (rebox.getSlice().empty()) // The array section is of the form array[%component][substring], keep // the input array extents and strides. return finalizeRebox(rebox, adaptor, destBoxTy, dest, base, - /*lbounds*/ llvm::ArrayRef(), - inputExtents, inputStrides, rewriter); + /*lbounds*/ {}, inputExtents, inputStrides, + rewriter); // The slice is of the form array(i:j:k)[%component]. Compute new extents // and strides. @@ -2298,8 +2297,8 @@ struct XReboxOpConversion : public EmboxCommonConversion { } } return finalizeRebox(rebox, adaptor, destBoxTy, dest, base, - /*lbounds*/ llvm::ArrayRef(), - slicedExtents, slicedStrides, rewriter); + /*lbounds*/ {}, slicedExtents, slicedStrides, + rewriter); } /// Apply a new shape to the data described by a box given the base address, @@ -3342,26 +3341,26 @@ struct LoadOpConversion : public fir::FIROpConversion { } }; -struct LocalitySpecifierOpConversion - : public fir::FIROpConversion { - using FIROpConversion::FIROpConversion; +template +struct DoConcurrentSpecifierOpConversion : public fir::FIROpConversion { + using fir::FIROpConversion::FIROpConversion; llvm::LogicalResult - matchAndRewrite(fir::LocalitySpecifierOp localizer, OpAdaptor adaptor, + matchAndRewrite(OpTy specifier, typename OpTy::Adaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { #ifdef EXPENSIVE_CHECKS auto uses = mlir::SymbolTable::getSymbolUses( - localizer, localizer->getParentOfType()); + specifier, specifier->getParentOfType()); - // `fir.local` ops are not supposed to have any uses at this point (i.e. - // during lowering to LLVM). In case of serialization, the - // `fir.do_concurrent` users are expected to have been lowered to + // `fir.local|fir.declare_reduction` ops are not supposed to have any uses + // at this point (i.e. during lowering to LLVM). In case of serialization, + // the `fir.do_concurrent` users are expected to have been lowered to // `fir.do_loop` nests. In case of parallelization, the `fir.do_concurrent` // users are expected to have been lowered to the target parallel model // (e.g. OpenMP). assert(uses && uses->empty()); #endif - rewriter.eraseOp(localizer); + rewriter.eraseOp(specifier); return mlir::success(); } }; @@ -3397,8 +3396,7 @@ static void genBrOp(A caseOp, mlir::Block *dest, std::optional destOps, if (destOps) rewriter.replaceOpWithNewOp(caseOp, *destOps, dest); else - rewriter.replaceOpWithNewOp( - caseOp, llvm::ArrayRef(), dest); + rewriter.replaceOpWithNewOp(caseOp, B{}, dest); } static void genCaseLadderStep(mlir::Location loc, mlir::Value cmp, @@ -4330,20 +4328,22 @@ void fir::populateFIRToLLVMConversionPatterns( BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion, CmpcOpConversion, VolatileCastOpConversion, ConvertOpConversion, CoordinateOpConversion, CopyOpConversion, DTEntryOpConversion, - DeclareOpConversion, DivcOpConversion, EmboxOpConversion, - EmboxCharOpConversion, EmboxProcOpConversion, ExtractValueOpConversion, - FieldIndexOpConversion, FirEndOpConversion, FreeMemOpConversion, - GlobalLenOpConversion, GlobalOpConversion, InsertOnRangeOpConversion, - IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion, - LocalitySpecifierOpConversion, MulcOpConversion, NegcOpConversion, - NoReassocOpConversion, SelectCaseOpConversion, SelectOpConversion, - SelectRankOpConversion, SelectTypeOpConversion, ShapeOpConversion, - ShapeShiftOpConversion, ShiftOpConversion, SliceOpConversion, - StoreOpConversion, StringLitOpConversion, SubcOpConversion, - TypeDescOpConversion, TypeInfoOpConversion, UnboxCharOpConversion, - UnboxProcOpConversion, UndefOpConversion, UnreachableOpConversion, - XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion, - ZeroOpConversion>(converter, options); + DeclareOpConversion, + DoConcurrentSpecifierOpConversion, + DoConcurrentSpecifierOpConversion, + DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion, + EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion, + FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion, + GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion, + LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion, + NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion, + SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion, + ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion, + SliceOpConversion, StoreOpConversion, StringLitOpConversion, + SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion, + UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion, + UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion, + XReboxOpConversion, ZeroOpConversion>(converter, options); // Patterns that are populated without a type converter do not trigger // target materializations for the operands of the root op. diff --git a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp index eca2c7f7c942f..b60ac11c7795a 100644 --- a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp @@ -107,9 +107,8 @@ class EmboxConversion : public mlir::OpRewritePattern { shapeOpers.push_back(extVal); } auto xbox = rewriter.create( - loc, embox.getType(), embox.getMemref(), shapeOpers, - llvm::ArrayRef(), llvm::ArrayRef(), - llvm::ArrayRef(), llvm::ArrayRef(), + loc, embox.getType(), embox.getMemref(), shapeOpers, mlir::ValueRange{}, + mlir::ValueRange{}, mlir::ValueRange{}, mlir::ValueRange{}, embox.getTypeparams(), embox.getSourceBox(), embox.getAllocatorIdxAttr()); LLVM_DEBUG(llvm::dbgs() << "rewriting " << embox << " to " << xbox << '\n'); diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index ecfa2939e96a6..6b40e7015fdd8 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -5041,6 +5041,9 @@ void fir::BoxTotalElementsOp::getCanonicalizationPatterns( // LocalitySpecifierOp //===----------------------------------------------------------------------===// +// TODO This is a copy of omp::PrivateClauseOp::verifiyRegions(). Once we find a +// solution to merge both ops into one this duplication will not be needed. See: +// https://discourse.llvm.org/t/dialect-for-data-locality-sharing-specifiers-clauses-in-openmp-openacc-and-do-concurrent/86108. llvm::LogicalResult fir::LocalitySpecifierOp::verifyRegions() { mlir::Type argType = getArgType(); auto verifyTerminator = [&](mlir::Operation *terminator, @@ -5136,6 +5139,84 @@ llvm::LogicalResult fir::LocalitySpecifierOp::verifyRegions() { return llvm::success(); } +// TODO This is a copy of omp::DeclareReductionOp::verifiyRegions(). Once we +// find a solution to merge both ops into one this duplication will not be +// needed. +mlir::LogicalResult fir::DeclareReductionOp::verifyRegions() { + if (!getAllocRegion().empty()) { + for (YieldOp yieldOp : getAllocRegion().getOps()) { + if (yieldOp.getResults().size() != 1 || + yieldOp.getResults().getTypes()[0] != getType()) + return emitOpError() << "expects alloc region to yield a value " + "of the reduction type"; + } + } + + if (getInitializerRegion().empty()) + return emitOpError() << "expects non-empty initializer region"; + mlir::Block &initializerEntryBlock = getInitializerRegion().front(); + + if (initializerEntryBlock.getNumArguments() == 1) { + if (!getAllocRegion().empty()) + return emitOpError() << "expects two arguments to the initializer region " + "when an allocation region is used"; + } else if (initializerEntryBlock.getNumArguments() == 2) { + if (getAllocRegion().empty()) + return emitOpError() << "expects one argument to the initializer region " + "when no allocation region is used"; + } else { + return emitOpError() + << "expects one or two arguments to the initializer region"; + } + + for (mlir::Value arg : initializerEntryBlock.getArguments()) + if (arg.getType() != getType()) + return emitOpError() << "expects initializer region argument to match " + "the reduction type"; + + for (YieldOp yieldOp : getInitializerRegion().getOps()) { + if (yieldOp.getResults().size() != 1 || + yieldOp.getResults().getTypes()[0] != getType()) + return emitOpError() << "expects initializer region to yield a value " + "of the reduction type"; + } + + if (getReductionRegion().empty()) + return emitOpError() << "expects non-empty reduction region"; + mlir::Block &reductionEntryBlock = getReductionRegion().front(); + if (reductionEntryBlock.getNumArguments() != 2 || + reductionEntryBlock.getArgumentTypes()[0] != + reductionEntryBlock.getArgumentTypes()[1] || + reductionEntryBlock.getArgumentTypes()[0] != getType()) + return emitOpError() << "expects reduction region with two arguments of " + "the reduction type"; + for (YieldOp yieldOp : getReductionRegion().getOps()) { + if (yieldOp.getResults().size() != 1 || + yieldOp.getResults().getTypes()[0] != getType()) + return emitOpError() << "expects reduction region to yield a value " + "of the reduction type"; + } + + if (!getAtomicReductionRegion().empty()) { + mlir::Block &atomicReductionEntryBlock = getAtomicReductionRegion().front(); + if (atomicReductionEntryBlock.getNumArguments() != 2 || + atomicReductionEntryBlock.getArgumentTypes()[0] != + atomicReductionEntryBlock.getArgumentTypes()[1]) + return emitOpError() << "expects atomic reduction region with two " + "arguments of the same type"; + } + + if (getCleanupRegion().empty()) + return mlir::success(); + mlir::Block &cleanupEntryBlock = getCleanupRegion().front(); + if (cleanupEntryBlock.getNumArguments() != 1 || + cleanupEntryBlock.getArgument(0).getType() != getType()) + return emitOpError() << "expects cleanup region with one argument " + "of the reduction type"; + + return mlir::success(); +} + //===----------------------------------------------------------------------===// // DoConcurrentOp //===----------------------------------------------------------------------===// @@ -5157,6 +5238,97 @@ llvm::LogicalResult fir::DoConcurrentOp::verify() { // DoConcurrentLoopOp //===----------------------------------------------------------------------===// +static mlir::ParseResult parseSpecifierList( + mlir::OpAsmParser &parser, mlir::OperationState &result, + llvm::StringRef specifierKeyword, llvm::StringRef symsAttrName, + llvm::SmallVectorImpl ®ionArgs, + llvm::SmallVectorImpl ®ionArgTypes, + int32_t &numSpecifierOperands, bool isReduce = false) { + auto &builder = parser.getBuilder(); + llvm::SmallVector specifierOperands; + + if (failed(parser.parseOptionalKeyword(specifierKeyword))) + return mlir::success(); + + std::size_t oldArgTypesSize = regionArgTypes.size(); + if (failed(parser.parseLParen())) + return mlir::failure(); + + llvm::SmallVector isByRefVec; + llvm::SmallVector spceifierSymbolVec; + llvm::SmallVector attributes; + + if (failed(parser.parseCommaSeparatedList([&]() { + if (isReduce) + isByRefVec.push_back( + parser.parseOptionalKeyword("byref").succeeded()); + + if (failed(parser.parseAttribute(spceifierSymbolVec.emplace_back()))) + return mlir::failure(); + + if (isReduce && + failed(parser.parseAttribute(attributes.emplace_back()))) + return mlir::failure(); + + if (parser.parseOperand(specifierOperands.emplace_back()) || + parser.parseArrow() || + parser.parseArgument(regionArgs.emplace_back())) + return mlir::failure(); + + return mlir::success(); + }))) + return mlir::failure(); + + if (failed(parser.parseColon())) + return mlir::failure(); + + if (failed(parser.parseCommaSeparatedList([&]() { + if (failed(parser.parseType(regionArgTypes.emplace_back()))) + return mlir::failure(); + + return mlir::success(); + }))) + return mlir::failure(); + + if (regionArgs.size() != regionArgTypes.size()) + return parser.emitError(parser.getNameLoc(), "mismatch in number of " + + specifierKeyword.str() + + " arg and types"); + + if (failed(parser.parseRParen())) + return mlir::failure(); + + for (auto operandType : + llvm::zip_equal(specifierOperands, + llvm::drop_begin(regionArgTypes, oldArgTypesSize))) + if (parser.resolveOperand(std::get<0>(operandType), + std::get<1>(operandType), result.operands)) + return mlir::failure(); + + if (isReduce) + result.addAttribute( + fir::DoConcurrentLoopOp::getReduceByrefAttrName(result.name), + isByRefVec.empty() + ? nullptr + : mlir::DenseBoolArrayAttr::get(builder.getContext(), isByRefVec)); + + llvm::SmallVector symbolAttrs(spceifierSymbolVec.begin(), + spceifierSymbolVec.end()); + result.addAttribute(symsAttrName, builder.getArrayAttr(symbolAttrs)); + + if (isReduce) { + llvm::SmallVector arrayAttr(attributes.begin(), + attributes.end()); + result.addAttribute( + fir::DoConcurrentLoopOp::getReduceAttrsAttrName(result.name), + builder.getArrayAttr(arrayAttr)); + } + + numSpecifierOperands = specifierOperands.size(); + + return mlir::success(); +} + mlir::ParseResult fir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, mlir::OperationState &result) { auto &builder = parser.getBuilder(); @@ -5192,90 +5364,26 @@ mlir::ParseResult fir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, parser.resolveOperands(steps, builder.getIndexType(), result.operands)) return mlir::failure(); - llvm::SmallVector reduceOperands; - llvm::SmallVector reduceArgTypes; - if (succeeded(parser.parseOptionalKeyword("reduce"))) { - // Parse reduction attributes and variables. - llvm::SmallVector attributes; - if (failed(parser.parseCommaSeparatedList( - mlir::AsmParser::Delimiter::Paren, [&]() { - if (parser.parseAttribute(attributes.emplace_back()) || - parser.parseArrow() || - parser.parseOperand(reduceOperands.emplace_back()) || - parser.parseColonType(reduceArgTypes.emplace_back())) - return mlir::failure(); - return mlir::success(); - }))) - return mlir::failure(); - // Resolve input operands. - for (auto operand_type : llvm::zip(reduceOperands, reduceArgTypes)) - if (parser.resolveOperand(std::get<0>(operand_type), - std::get<1>(operand_type), result.operands)) - return mlir::failure(); - llvm::SmallVector arrayAttr(attributes.begin(), - attributes.end()); - result.addAttribute(getReduceAttrsAttrName(result.name), - builder.getArrayAttr(arrayAttr)); - } - - llvm::SmallVector localOperands; - if (succeeded(parser.parseOptionalKeyword("local"))) { - std::size_t oldArgTypesSize = argTypes.size(); - if (failed(parser.parseLParen())) - return mlir::failure(); - - llvm::SmallVector localSymbolVec; - if (failed(parser.parseCommaSeparatedList([&]() { - if (failed(parser.parseAttribute(localSymbolVec.emplace_back()))) - return mlir::failure(); - - if (parser.parseOperand(localOperands.emplace_back()) || - parser.parseArrow() || - parser.parseArgument(regionArgs.emplace_back())) - return mlir::failure(); - - return mlir::success(); - }))) - return mlir::failure(); - - if (failed(parser.parseColon())) - return mlir::failure(); - - if (failed(parser.parseCommaSeparatedList([&]() { - if (failed(parser.parseType(argTypes.emplace_back()))) - return mlir::failure(); - - return mlir::success(); - }))) - return mlir::failure(); - - if (regionArgs.size() != argTypes.size()) - return parser.emitError(parser.getNameLoc(), - "mismatch in number of local arg and types"); - - if (failed(parser.parseRParen())) - return mlir::failure(); - - for (auto operandType : llvm::zip_equal( - localOperands, llvm::drop_begin(argTypes, oldArgTypesSize))) - if (parser.resolveOperand(std::get<0>(operandType), - std::get<1>(operandType), result.operands)) - return mlir::failure(); + int32_t numLocalOperands = 0; + if (failed(parseSpecifierList(parser, result, "local", + getLocalSymsAttrName(result.name), regionArgs, + argTypes, numLocalOperands))) + return mlir::failure(); - llvm::SmallVector symbolAttrs(localSymbolVec.begin(), - localSymbolVec.end()); - result.addAttribute(getLocalSymsAttrName(result.name), - builder.getArrayAttr(symbolAttrs)); - } + int32_t numReduceOperands = 0; + if (failed(parseSpecifierList( + parser, result, "reduce", getReduceSymsAttrName(result.name), + regionArgs, argTypes, numReduceOperands, /*isReduce=*/true))) + return mlir::failure(); // Set `operandSegmentSizes` attribute. - result.addAttribute(DoConcurrentLoopOp::getOperandSegmentSizeAttr(), - builder.getDenseI32ArrayAttr( - {static_cast(lower.size()), - static_cast(upper.size()), - static_cast(steps.size()), - static_cast(reduceOperands.size()), - static_cast(localOperands.size())})); + result.addAttribute( + DoConcurrentLoopOp::getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr({static_cast(lower.size()), + static_cast(upper.size()), + static_cast(steps.size()), + static_cast(numLocalOperands), + static_cast(numReduceOperands)})); // Now parse the body. for (auto [arg, type] : llvm::zip_equal(regionArgs, argTypes)) @@ -5297,17 +5405,6 @@ void fir::DoConcurrentLoopOp::print(mlir::OpAsmPrinter &p) { << ") = (" << getLowerBound() << ") to (" << getUpperBound() << ") step (" << getStep() << ")"; - if (!getReduceOperands().empty()) { - p << " reduce("; - auto attrs = getReduceAttrsAttr(); - auto operands = getReduceOperands(); - llvm::interleaveComma(llvm::zip(attrs, operands), p, [&](auto it) { - p << std::get<0>(it) << " -> " << std::get<1>(it) << " : " - << std::get<1>(it).getType(); - }); - p << ')'; - } - if (!getLocalVars().empty()) { p << " local("; llvm::interleaveComma(llvm::zip_equal(getLocalSymsAttr(), getLocalVars(), @@ -5322,13 +5419,34 @@ void fir::DoConcurrentLoopOp::print(mlir::OpAsmPrinter &p) { p << ")"; } + if (!getReduceVars().empty()) { + p << " reduce("; + llvm::interleaveComma( + llvm::zip_equal(getReduceByrefAttr().asArrayRef(), getReduceSymsAttr(), + getReduceAttrsAttr(), getReduceVars(), + getRegionReduceArgs()), + p, [&](auto it) { + if (std::get<0>(it)) + p << "byref "; + + p << std::get<1>(it) << " " << std::get<2>(it) << " " + << std::get<3>(it) << " -> " << std::get<4>(it); + }); + p << " : "; + llvm::interleaveComma(getReduceVars(), p, + [&](auto it) { p << it.getType(); }); + p << ")"; + } + p << ' '; p.printRegion(getRegion(), /*printEntryBlockArgs=*/false); p.printOptionalAttrDict( (*this)->getAttrs(), /*elidedAttrs=*/{DoConcurrentLoopOp::getOperandSegmentSizeAttr(), + DoConcurrentLoopOp::getLocalSymsAttrName(), + DoConcurrentLoopOp::getReduceSymsAttrName(), DoConcurrentLoopOp::getReduceAttrsAttrName(), - DoConcurrentLoopOp::getLocalSymsAttrName()}); + DoConcurrentLoopOp::getReduceByrefAttrName()}); } llvm::SmallVector fir::DoConcurrentLoopOp::getLoopRegions() { @@ -5340,6 +5458,7 @@ llvm::LogicalResult fir::DoConcurrentLoopOp::verify() { mlir::Operation::operand_range ubValues = getUpperBound(); mlir::Operation::operand_range stepValues = getStep(); mlir::Operation::operand_range localVars = getLocalVars(); + mlir::Operation::operand_range reduceVars = getReduceVars(); if (lbValues.empty()) return emitOpError( @@ -5353,7 +5472,8 @@ llvm::LogicalResult fir::DoConcurrentLoopOp::verify() { // Check that the body defines the same number of block arguments as the // number of tuple elements in step. mlir::Block *body = getBody(); - unsigned numIndVarArgs = body->getNumArguments() - localVars.size(); + unsigned numIndVarArgs = + body->getNumArguments() - localVars.size() - reduceVars.size(); if (numIndVarArgs != stepValues.size()) return emitOpError() << "expects the same number of induction variables: " diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 2ff1d6d945ba3..4a9579cfde37c 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -1533,7 +1533,9 @@ std::optional> fir::getTypeSizeAndAlignment(mlir::Location loc, mlir::Type ty, const mlir::DataLayout &dl, const fir::KindMapping &kindMap) { - if (mlir::isa(ty)) { + if (ty.isIntOrIndexOrFloat() || + mlir::isa(ty)) { llvm::TypeSize size = dl.getTypeSize(ty); unsigned short alignment = dl.getTypeABIAlignment(ty); return std::pair{size, alignment}; diff --git a/flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp index 317a41a2129c3..0767733f53728 100644 --- a/flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp @@ -29,8 +29,9 @@ namespace fir::acc { -static mlir::TypedValue -getPtrFromVar(mlir::Value var) { +template +mlir::TypedValue +OpenACCMappableModel::getVarPtr(mlir::Type type, mlir::Value var) const { if (auto ptr = mlir::dyn_cast>(var)) return ptr; @@ -44,34 +45,51 @@ getPtrFromVar(mlir::Value var) { return {}; } -template <> -mlir::TypedValue -OpenACCMappableModel::getVarPtr(mlir::Type type, - mlir::Value var) const { - return getPtrFromVar(var); -} - -template <> -mlir::TypedValue +template mlir::TypedValue OpenACCMappableModel::getVarPtr(mlir::Type type, - mlir::Value var) const { - return getPtrFromVar(var); -} + mlir::Value var) const; -template <> -std::optional -OpenACCMappableModel::getSizeInBytes( +template mlir::TypedValue +OpenACCMappableModel::getVarPtr(mlir::Type type, + mlir::Value var) const; + +template mlir::TypedValue +OpenACCMappableModel::getVarPtr(mlir::Type type, + mlir::Value var) const; + +template mlir::TypedValue +OpenACCMappableModel::getVarPtr(mlir::Type type, + mlir::Value var) const; + +template +std::optional OpenACCMappableModel::getSizeInBytes( mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, const mlir::DataLayout &dataLayout) const { - // TODO: Bounds operation affect the total size - add support to take them + // TODO: Bounds operation affect the size - add support to take them // into account. if (!accBounds.empty()) return {}; + // Class-type is either a polymorphic or unlimited polymorphic. In the latter + // case, the size is not computable. But in the former it should be - however, + // fir::getTypeSizeAndAlignment does not support polymorphic types. + if (mlir::isa(type)) { + return {}; + } + + // When requesting the size of a box entity or a reference, the intent + // is to get the size of the data that it is referring to. + mlir::Type eleTy = fir::dyn_cast_ptrOrBoxEleTy(type); + assert(eleTy && "expect to be able to unwrap the element type"); + + // If the type enclosed is a mappable type, then have it provide the size. + if (auto mappableTy = mlir::dyn_cast(eleTy)) + return mappableTy.getSizeInBytes(var, accBounds, dataLayout); + // Dynamic extents or unknown ranks generally do not have compile-time // computable dimensions. - auto seqType = mlir::cast(type); - if (seqType.hasDynamicExtents() || seqType.hasUnknownShape()) + auto seqType = mlir::dyn_cast(eleTy); + if (seqType && (seqType.hasDynamicExtents() || seqType.hasUnknownShape())) return {}; // Attempt to find an operation that a lookup for KindMapping can be done @@ -85,99 +103,113 @@ OpenACCMappableModel::getSizeInBytes( auto kindMap = fir::getKindMapping(kindMapSrcOp); auto sizeAndAlignment = - fir::getTypeSizeAndAlignment(var.getLoc(), type, dataLayout, kindMap); + fir::getTypeSizeAndAlignment(var.getLoc(), eleTy, dataLayout, kindMap); if (!sizeAndAlignment.has_value()) return {}; return {llvm::TypeSize::getFixed(sizeAndAlignment->first)}; } -template <> -std::optional +template std::optional OpenACCMappableModel::getSizeInBytes( mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, - const mlir::DataLayout &dataLayout) const { - // If we have a box value instead of box reference, the intent is to - // get the size of the data not the box itself. - if (auto boxTy = mlir::dyn_cast(var.getType())) { - if (auto mappableTy = mlir::dyn_cast( - fir::unwrapRefType(boxTy.getEleTy()))) { - return mappableTy.getSizeInBytes(var, accBounds, dataLayout); - } - } - // Size for boxes is not computable until it gets materialized. - return {}; -} + const mlir::DataLayout &dataLayout) const; -template <> -std::optional -OpenACCMappableModel::getOffsetInBytes( +template std::optional +OpenACCMappableModel::getSizeInBytes( + mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, + const mlir::DataLayout &dataLayout) const; + +template std::optional +OpenACCMappableModel::getSizeInBytes( + mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, + const mlir::DataLayout &dataLayout) const; + +template std::optional +OpenACCMappableModel::getSizeInBytes( + mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, + const mlir::DataLayout &dataLayout) const; + +template +std::optional OpenACCMappableModel::getOffsetInBytes( mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, const mlir::DataLayout &dataLayout) const { - // TODO: Bounds operation affect the offset- add support to take them + // TODO: Bounds operation affect the offset - add support to take them // into account. if (!accBounds.empty()) return {}; + // Class-type does not behave like a normal box because it does not hold an + // element type. Thus special handle it here. + if (mlir::isa(type)) { + // The pointer to the class-type is always at the start address. + return {0}; + } + + mlir::Type eleTy = fir::dyn_cast_ptrOrBoxEleTy(type); + assert(eleTy && "expect to be able to unwrap the element type"); + + // If the type enclosed is a mappable type, then have it provide the offset. + if (auto mappableTy = mlir::dyn_cast(eleTy)) + return mappableTy.getOffsetInBytes(var, accBounds, dataLayout); + // Dynamic extents (aka descriptor-based arrays) - may have a offset. // For example, a negative stride may mean a negative offset to compute the // start of array. - auto seqType = mlir::cast(type); - if (seqType.hasDynamicExtents() || seqType.hasUnknownShape()) + auto seqType = mlir::dyn_cast(eleTy); + if (seqType && (seqType.hasDynamicExtents() || seqType.hasUnknownShape())) return {}; - // We have non-dynamic extents - but if for some reason the size is not - // computable - assume offset is not either. Otherwise, it is an offset of - // zero. + // If the size is computable and since there are no bounds or dynamic extents, + // then the offset relative to pointer must be zero. if (getSizeInBytes(type, var, accBounds, dataLayout).has_value()) { return {0}; } + + // The offset is not evident because it is relative to the pointer being held. + // And we don't have any further details about this type. return {}; } -template <> -std::optional OpenACCMappableModel::getOffsetInBytes( +template std::optional +OpenACCMappableModel::getOffsetInBytes( mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, - const mlir::DataLayout &dataLayout) const { - // If we have a box value instead of box reference, the intent is to - // get the offset of the data not the offset of the box itself. - if (auto boxTy = mlir::dyn_cast(var.getType())) { - if (auto mappableTy = mlir::dyn_cast( - fir::unwrapRefType(boxTy.getEleTy()))) { - return mappableTy.getOffsetInBytes(var, accBounds, dataLayout); - } - } - // Until boxes get materialized, the offset is not evident because it is - // relative to the pointer being held. - return {}; -} + const mlir::DataLayout &dataLayout) const; -template <> -llvm::SmallVector -OpenACCMappableModel::generateAccBounds( - mlir::Type type, mlir::Value var, mlir::OpBuilder &builder) const { +template std::optional +OpenACCMappableModel::getOffsetInBytes( + mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, + const mlir::DataLayout &dataLayout) const; + +template std::optional +OpenACCMappableModel::getOffsetInBytes( + mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, + const mlir::DataLayout &dataLayout) const; + +template std::optional +OpenACCMappableModel::getOffsetInBytes( + mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, + const mlir::DataLayout &dataLayout) const; + +static llvm::SmallVector +generateSeqTyAccBounds(fir::SequenceType seqType, mlir::Value var, + mlir::OpBuilder &builder) { assert((mlir::isa(var.getType()) || mlir::isa(var.getType())) && "must be pointer-like or mappable"); - fir::FirOpBuilder firBuilder(builder, var.getDefiningOp()); - auto seqType = mlir::cast(type); mlir::Location loc = var.getLoc(); - mlir::Value varPtr = - mlir::isa(var.getType()) - ? var - : mlir::cast(var.getType()).getVarPtr(var); - if (seqType.hasDynamicExtents() || seqType.hasUnknownShape()) { if (auto boxAddr = - mlir::dyn_cast_if_present(varPtr.getDefiningOp())) { + mlir::dyn_cast_if_present(var.getDefiningOp())) { mlir::Value box = boxAddr.getVal(); auto res = hlfir::translateToExtendedValue(loc, firBuilder, hlfir::Entity(box)); fir::ExtendedValue exv = res.first; mlir::Value boxRef = box; - if (auto boxPtr = getPtrFromVar(box)) { + if (auto boxPtr = mlir::cast(box.getType()) + .getVarPtr(box)) { boxRef = boxPtr; } // TODO: Handle Fortran optional. @@ -189,7 +221,7 @@ OpenACCMappableModel::generateAccBounds( firBuilder, loc, exv, info); } - if (mlir::isa(varPtr.getDefiningOp())) { + if (mlir::isa(var.getDefiningOp())) { mlir::Value zero = firBuilder.createIntegerConstant(loc, builder.getIndexType(), 0); mlir::Value one = @@ -197,10 +229,10 @@ OpenACCMappableModel::generateAccBounds( mlir::Value shape; if (auto declareOp = - mlir::dyn_cast_if_present(varPtr.getDefiningOp())) + mlir::dyn_cast_if_present(var.getDefiningOp())) shape = declareOp.getShape(); else if (auto declareOp = mlir::dyn_cast_if_present( - varPtr.getDefiningOp())) + var.getDefiningOp())) shape = declareOp.getShape(); const bool strideIncludeLowerExtent = true; @@ -265,9 +297,9 @@ OpenACCMappableModel::generateAccBounds( // TODO: Detect assumed-size case. const bool isAssumedSize = false; - auto valToCheck = varPtr; + auto valToCheck = var; if (auto boxAddr = - mlir::dyn_cast_if_present(varPtr.getDefiningOp())) { + mlir::dyn_cast_if_present(var.getDefiningOp())) { valToCheck = boxAddr.getVal(); } auto res = hlfir::translateToExtendedValue(loc, firBuilder, @@ -279,86 +311,34 @@ OpenACCMappableModel::generateAccBounds( /*isAssumedSize=*/isAssumedSize); } -template <> +template llvm::SmallVector -OpenACCMappableModel::generateAccBounds( - mlir::Type type, mlir::Value var, mlir::OpBuilder &builder) const { - // If we have a box value instead of box reference, the intent is to - // get the bounds of the data not the bounds of the box itself. - if (auto boxTy = mlir::dyn_cast(var.getType())) { - if (auto mappableTy = mlir::dyn_cast( - fir::unwrapRefType(boxTy.getEleTy()))) { - mlir::Value data = builder.create(var.getLoc(), var); - return mappableTy.generateAccBounds(data, builder); - } +OpenACCMappableModel::generateAccBounds(mlir::Type type, mlir::Value var, + mlir::OpBuilder &builder) const { + // acc bounds only make sense for arrays - thus look for sequence type. + mlir::Type eleTy = fir::dyn_cast_ptrOrBoxEleTy(type); + if (auto seqTy = mlir::dyn_cast_if_present(eleTy)) { + return generateSeqTyAccBounds(seqTy, var, builder); } - // Box references are not arrays - thus generating acc.bounds does not make - // sense. - return {}; -} - -static bool isScalarLike(mlir::Type type) { - return fir::isa_trivial(type) || fir::isa_ref_type(type); -} - -static bool isArrayLike(mlir::Type type) { - return mlir::isa(type); -} -static bool isCompositeLike(mlir::Type type) { - // class(*) is not a composite type since it does not have a determined type. - if (fir::isUnlimitedPolymorphicType(type)) - return false; - - return mlir::isa(type); -} - -template <> -mlir::acc::VariableTypeCategory -OpenACCMappableModel::getTypeCategory( - mlir::Type type, mlir::Value var) const { - return mlir::acc::VariableTypeCategory::array; + return {}; } -template <> -mlir::acc::VariableTypeCategory -OpenACCMappableModel::getTypeCategory(mlir::Type type, - mlir::Value var) const { - // Class-type does not behave like a normal box because it does not hold an - // element type. Thus special handle it here. - if (mlir::isa(type)) { - // class(*) is not a composite type since it does not have a determined - // type. - if (fir::isUnlimitedPolymorphicType(type)) - return mlir::acc::VariableTypeCategory::uncategorized; - return mlir::acc::VariableTypeCategory::composite; - } - - mlir::Type eleTy = fir::dyn_cast_ptrOrBoxEleTy(type); - assert(eleTy && "expect to be able to unwrap the element type"); +template llvm::SmallVector +OpenACCMappableModel::generateAccBounds( + mlir::Type type, mlir::Value var, mlir::OpBuilder &builder) const; - // If the type enclosed by the box is a mappable type, then have it - // provide the type category. - if (auto mappableTy = mlir::dyn_cast(eleTy)) - return mappableTy.getTypeCategory(var); +template llvm::SmallVector +OpenACCMappableModel::generateAccBounds( + mlir::Type type, mlir::Value var, mlir::OpBuilder &builder) const; - // For all arrays, despite whether they are allocatable, pointer, assumed, - // etc, we'd like to categorize them as "array". - if (isArrayLike(eleTy)) - return mlir::acc::VariableTypeCategory::array; - - // We got here because we don't have an array nor a mappable type. At this - // point, we know we have a type that fits the "aggregate" definition since it - // is a type with a descriptor. Try to refine it by checking if it matches the - // "composite" definition. - if (isCompositeLike(eleTy)) - return mlir::acc::VariableTypeCategory::composite; +template llvm::SmallVector +OpenACCMappableModel::generateAccBounds( + mlir::Type type, mlir::Value var, mlir::OpBuilder &builder) const; - // Even if we have a scalar type - simply because it is wrapped in a box - // we want to categorize it as "nonscalar". Anything else would've been - // non-scalar anyway. - return mlir::acc::VariableTypeCategory::nonscalar; -} +template llvm::SmallVector +OpenACCMappableModel::generateAccBounds( + mlir::Type type, mlir::Value var, mlir::OpBuilder &builder) const; static mlir::Value getBaseRef(mlir::TypedValue varPtr) { @@ -389,33 +369,44 @@ getBaseRef(mlir::TypedValue varPtr) { return baseRef; } -static mlir::acc::VariableTypeCategory -categorizePointee(mlir::Type pointer, - mlir::TypedValue varPtr, - mlir::Type varType) { - // FIR uses operations to compute interior pointers. - // So for example, an array element or composite field access to a float - // value would both be represented as !fir.ref. We do not want to treat - // such a reference as a scalar. Thus unwrap interior pointer calculations. - auto baseRef = getBaseRef(varPtr); +static bool isScalarLike(mlir::Type type) { + return fir::isa_trivial(type) || fir::isa_ref_type(type); +} - if (auto mappableTy = - mlir::dyn_cast(baseRef.getType())) - return mappableTy.getTypeCategory(baseRef); +static bool isArrayLike(mlir::Type type) { + return mlir::isa(type); +} - // It must be a pointer-like type since it is not a MappableType. - auto ptrLikeTy = mlir::cast(baseRef.getType()); - mlir::Type eleTy = ptrLikeTy.getElementType(); +static bool isCompositeLike(mlir::Type type) { + // class(*) is not a composite type since it does not have a determined type. + if (fir::isUnlimitedPolymorphicType(type)) + return false; - if (auto mappableEleTy = mlir::dyn_cast(eleTy)) - return mappableEleTy.getTypeCategory(varPtr); + return mlir::isa(type); +} - if (isScalarLike(eleTy)) - return mlir::acc::VariableTypeCategory::scalar; +static mlir::acc::VariableTypeCategory +categorizeElemType(mlir::Type enclosingTy, mlir::Type eleTy, mlir::Value var) { + // If the type enclosed is a mappable type, then have it provide the type + // category. + if (auto mappableTy = mlir::dyn_cast(eleTy)) + return mappableTy.getTypeCategory(var); + + // For all arrays, despite whether they are allocatable, pointer, assumed, + // etc, we'd like to categorize them as "array". if (isArrayLike(eleTy)) return mlir::acc::VariableTypeCategory::array; + if (isCompositeLike(eleTy)) return mlir::acc::VariableTypeCategory::composite; + if (mlir::isa(enclosingTy)) { + // Even if we have a scalar type - simply because it is wrapped in a box + // we want to categorize it as "nonscalar". Anything else would've been + // non-scalar anyway. + return mlir::acc::VariableTypeCategory::nonscalar; + } + if (isScalarLike(eleTy)) + return mlir::acc::VariableTypeCategory::scalar; if (mlir::isa(eleTy)) return mlir::acc::VariableTypeCategory::nonscalar; // Assumed-type (type(*))does not have a determined type that can be @@ -431,6 +422,77 @@ categorizePointee(mlir::Type pointer, return mlir::acc::VariableTypeCategory::uncategorized; } +template +mlir::acc::VariableTypeCategory +OpenACCMappableModel::getTypeCategory(mlir::Type type, + mlir::Value var) const { + // FIR uses operations to compute interior pointers. + // So for example, an array element or composite field access to a float + // value would both be represented as !fir.ref. We do not want to treat + // such a reference as a scalar. Thus unwrap interior pointer calculations. + mlir::Type eleTy = fir::dyn_cast_ptrOrBoxEleTy(type); + if (eleTy && isScalarLike(eleTy)) { + if (auto ptrLikeVar = mlir::dyn_cast_if_present< + mlir::TypedValue>(var)) { + auto baseRef = getBaseRef(ptrLikeVar); + if (baseRef != var) { + type = baseRef.getType(); + if (auto mappableTy = mlir::dyn_cast(type)) + return mappableTy.getTypeCategory(baseRef); + } + } + } + + // Class-type does not behave like a normal box because it does not hold an + // element type. Thus special handle it here. + if (mlir::isa(type)) { + // class(*) is not a composite type since it does not have a determined + // type. + if (fir::isUnlimitedPolymorphicType(type)) + return mlir::acc::VariableTypeCategory::uncategorized; + return mlir::acc::VariableTypeCategory::composite; + } + + assert(eleTy && "expect to be able to unwrap the element type"); + return categorizeElemType(type, eleTy, var); +} + +template mlir::acc::VariableTypeCategory +OpenACCMappableModel::getTypeCategory(mlir::Type type, + mlir::Value var) const; + +template mlir::acc::VariableTypeCategory +OpenACCMappableModel::getTypeCategory( + mlir::Type type, mlir::Value var) const; + +template mlir::acc::VariableTypeCategory +OpenACCMappableModel::getTypeCategory(mlir::Type type, + mlir::Value var) const; + +template mlir::acc::VariableTypeCategory +OpenACCMappableModel::getTypeCategory(mlir::Type type, + mlir::Value var) const; + +static mlir::acc::VariableTypeCategory +categorizePointee(mlir::Type pointer, + mlir::TypedValue varPtr, + mlir::Type varType) { + // FIR uses operations to compute interior pointers. + // So for example, an array element or composite field access to a float + // value would both be represented as !fir.ref. We do not want to treat + // such a reference as a scalar. Thus unwrap interior pointer calculations. + auto baseRef = getBaseRef(varPtr); + + if (auto mappableTy = + mlir::dyn_cast(baseRef.getType())) + return mappableTy.getTypeCategory(baseRef); + + // It must be a pointer-like type since it is not a MappableType. + auto ptrLikeTy = mlir::cast(baseRef.getType()); + mlir::Type eleTy = ptrLikeTy.getElementType(); + return categorizeElemType(pointer, eleTy, varPtr); +} + template <> mlir::acc::VariableTypeCategory OpenACCPointerLikeModel::getPointeeTypeCategory( diff --git a/flang/lib/Optimizer/OpenACC/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/RegisterOpenACCExtensions.cpp index 5f174ad4b40fe..869f9c2429aa0 100644 --- a/flang/lib/Optimizer/OpenACC/RegisterOpenACCExtensions.cpp +++ b/flang/lib/Optimizer/OpenACC/RegisterOpenACCExtensions.cpp @@ -19,11 +19,14 @@ namespace fir::acc { void registerOpenACCExtensions(mlir::DialectRegistry ®istry) { registry.addExtension(+[](mlir::MLIRContext *ctx, fir::FIROpsDialect *dialect) { - fir::SequenceType::attachInterface>( - *ctx); fir::BoxType::attachInterface>(*ctx); fir::ClassType::attachInterface>( *ctx); + fir::ReferenceType::attachInterface< + OpenACCMappableModel>(*ctx); + fir::PointerType::attachInterface>( + *ctx); + fir::HeapType::attachInterface>(*ctx); fir::ReferenceType::attachInterface< OpenACCPointerLikeModel>(*ctx); @@ -31,6 +34,7 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) { OpenACCPointerLikeModel>(*ctx); fir::HeapType::attachInterface>( *ctx); + fir::LLVMPointerType::attachInterface< OpenACCPointerLikeModel>(*ctx); }); diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index 28f6c8bf02813..31076f6eb328f 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -312,11 +312,24 @@ class DoConcurrentConversion bool isComposite) const { mlir::omp::WsloopOperands wsloopClauseOps; + auto cloneFIRRegionToOMP = [&rewriter](mlir::Region &firRegion, + mlir::Region &ompRegion) { + if (!firRegion.empty()) { + rewriter.cloneRegionBefore(firRegion, ompRegion, ompRegion.begin()); + auto firYield = + mlir::cast(ompRegion.back().getTerminator()); + rewriter.setInsertionPoint(firYield); + rewriter.create(firYield.getLoc(), + firYield.getOperands()); + rewriter.eraseOp(firYield); + } + }; + // For `local` (and `local_init`) opernads, emit corresponding `private` // clauses and attach these clauses to the workshare loop. - if (!loop.getLocalOperands().empty()) + if (!loop.getLocalVars().empty()) for (auto [op, sym, arg] : llvm::zip_equal( - loop.getLocalOperands(), + loop.getLocalVars(), loop.getLocalSymsAttr().getAsRange(), loop.getRegionLocalArgs())) { auto localizer = mlir::SymbolTable::lookupNearestSymbolFrom< @@ -326,50 +339,65 @@ class DoConcurrentConversion TODO(localizer.getLoc(), "local_init conversion is not supported yet"); - auto oldIP = rewriter.saveInsertionPoint(); + mlir::OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointAfter(localizer); + auto privatizer = rewriter.create( localizer.getLoc(), sym.getLeafReference().str() + ".omp", localizer.getTypeAttr().getValue(), mlir::omp::DataSharingClauseType::Private); - if (!localizer.getInitRegion().empty()) { - rewriter.cloneRegionBefore(localizer.getInitRegion(), - privatizer.getInitRegion(), - privatizer.getInitRegion().begin()); - auto firYield = mlir::cast( - privatizer.getInitRegion().back().getTerminator()); - rewriter.setInsertionPoint(firYield); - rewriter.create(firYield.getLoc(), - firYield.getOperands()); - rewriter.eraseOp(firYield); - } - - if (!localizer.getDeallocRegion().empty()) { - rewriter.cloneRegionBefore(localizer.getDeallocRegion(), - privatizer.getDeallocRegion(), - privatizer.getDeallocRegion().begin()); - auto firYield = mlir::cast( - privatizer.getDeallocRegion().back().getTerminator()); - rewriter.setInsertionPoint(firYield); - rewriter.create(firYield.getLoc(), - firYield.getOperands()); - rewriter.eraseOp(firYield); - } - - rewriter.restoreInsertionPoint(oldIP); + cloneFIRRegionToOMP(localizer.getInitRegion(), + privatizer.getInitRegion()); + cloneFIRRegionToOMP(localizer.getDeallocRegion(), + privatizer.getDeallocRegion()); wsloopClauseOps.privateVars.push_back(op); wsloopClauseOps.privateSyms.push_back( mlir::SymbolRefAttr::get(privatizer)); } + if (!loop.getReduceVars().empty()) { + for (auto [op, byRef, sym, arg] : llvm::zip_equal( + loop.getReduceVars(), loop.getReduceByrefAttr().asArrayRef(), + loop.getReduceSymsAttr().getAsRange(), + loop.getRegionReduceArgs())) { + auto firReducer = + mlir::SymbolTable::lookupNearestSymbolFrom( + loop, sym); + + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointAfter(firReducer); + + auto ompReducer = rewriter.create( + firReducer.getLoc(), sym.getLeafReference().str() + ".omp", + firReducer.getTypeAttr().getValue()); + + cloneFIRRegionToOMP(firReducer.getAllocRegion(), + ompReducer.getAllocRegion()); + cloneFIRRegionToOMP(firReducer.getInitializerRegion(), + ompReducer.getInitializerRegion()); + cloneFIRRegionToOMP(firReducer.getReductionRegion(), + ompReducer.getReductionRegion()); + cloneFIRRegionToOMP(firReducer.getAtomicReductionRegion(), + ompReducer.getAtomicReductionRegion()); + cloneFIRRegionToOMP(firReducer.getCleanupRegion(), + ompReducer.getCleanupRegion()); + + wsloopClauseOps.reductionVars.push_back(op); + wsloopClauseOps.reductionByref.push_back(byRef); + wsloopClauseOps.reductionSyms.push_back( + mlir::SymbolRefAttr::get(ompReducer)); + } + } + auto wsloopOp = rewriter.create(loop.getLoc(), wsloopClauseOps); wsloopOp.setComposite(isComposite); Fortran::common::openmp::EntryBlockArgs wsloopArgs; wsloopArgs.priv.vars = wsloopClauseOps.privateVars; + wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; Fortran::common::openmp::genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion()); @@ -393,7 +421,8 @@ class DoConcurrentConversion clauseOps.loopLowerBounds.size()))) rewriter.replaceAllUsesWith(loopNestArg, wsloopArg); - for (unsigned i = 0; i < loop.getLocalVars().size(); ++i) + for (unsigned i = 0; + i < loop.getLocalVars().size() + loop.getReduceVars().size(); ++i) loopNestOp.getRegion().eraseArgument(clauseOps.loopLowerBounds.size()); return loopNestOp; diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp index e440852b3103a..506c8e66dbdfa 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp @@ -234,6 +234,10 @@ class DoConcurrentConversion loop.setLocalSymsAttr(nullptr); } + for (auto [reduceVar, reduceArg] : + llvm::zip_equal(loop.getReduceVars(), loop.getRegionReduceArgs())) + rewriter.replaceAllUsesWith(reduceArg, reduceVar); + // Collect iteration variable(s) allocations so that we can move them // outside the `fir.do_concurrent` wrapper. llvm::SmallVector opsToMove; @@ -257,12 +261,16 @@ class DoConcurrentConversion innermostUnorderdLoop = rewriter.create( doConcurentOp.getLoc(), lb, ub, st, /*unordred=*/true, /*finalCountValue=*/false, - /*iterArgs=*/std::nullopt, loop.getReduceOperands(), + /*iterArgs=*/std::nullopt, loop.getReduceVars(), loop.getReduceAttrsAttr()); ivArgs.push_back(innermostUnorderdLoop.getInductionVar()); rewriter.setInsertionPointToStart(innermostUnorderdLoop.getBody()); } + loop.getRegion().front().eraseArguments(loop.getNumInductionVars() + + loop.getNumLocalOperands(), + loop.getNumReduceOperands()); + rewriter.inlineBlockBefore( &loopBlock, innermostUnorderdLoop.getBody()->getTerminator(), ivArgs); rewriter.eraseOp(doConcurentOp); diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 3016ce4ccd2f8..d70aaab82cbab 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1276,6 +1276,58 @@ struct OmpEndDirectiveParser { llvm::omp::Directive dir_; }; +struct OmpAllocatorsConstructParser { + using resultType = OpenMPAllocatorsConstruct; + + std::optional Parse(ParseState &state) const { + auto dirSpec{Parser{}.Parse(state)}; + if (!dirSpec || dirSpec->DirId() != llvm::omp::Directive::OMPD_allocators) { + return std::nullopt; + } + + // This should be an allocate-stmt. That will be checked in semantics. + Block block; + if (auto stmt{attempt(Parser{}).Parse(state)}) { + block.emplace_back(std::move(*stmt)); + } + // Allow empty block. Check for this in semantics. + + auto end{OmpEndDirectiveParser{llvm::omp::Directive::OMPD_allocators}}; + return OpenMPAllocatorsConstruct{ + std::move(*dirSpec), std::move(block), *maybe(end).Parse(state)}; + } +}; + +TYPE_PARSER(sourced( // + construct( + "ALLOCATORS"_tok >= OmpAllocatorsConstructParser{}))) + +struct OmpDispatchConstructParser { + using resultType = OpenMPDispatchConstruct; + + std::optional Parse(ParseState &state) const { + auto dirSpec{Parser{}.Parse(state)}; + if (!dirSpec || dirSpec->DirId() != llvm::omp::Directive::OMPD_dispatch) { + return std::nullopt; + } + + // This should be a function call. That will be checked in semantics. + Block block; + if (auto stmt{attempt(Parser{}).Parse(state)}) { + block.emplace_back(std::move(*stmt)); + } + // Allow empty block. Check for this in semantics. + + auto end{OmpEndDirectiveParser{llvm::omp::Directive::OMPD_dispatch}}; + return OpenMPDispatchConstruct{ + std::move(*dirSpec), std::move(block), *maybe(end).Parse(state)}; + } +}; + +TYPE_PARSER(sourced( // + construct( + "DISPATCH"_tok >= OmpDispatchConstructParser{}))) + // Parser for an arbitrary OpenMP ATOMIC construct. // // Depending on circumstances, an ATOMIC construct applies to one or more @@ -1605,16 +1657,6 @@ TYPE_PARSER(sourced(construct(verbatim("CRITICAL"_tok), TYPE_PARSER(construct( Parser{}, block, Parser{})) -TYPE_PARSER(sourced(construct( - verbatim("DISPATCH"_tok), Parser{}))) - -TYPE_PARSER( - construct(startOmpLine >> "END DISPATCH"_tok)) - -TYPE_PARSER(sourced(construct( - Parser{} / endOmpLine, block, - maybe(Parser{} / endOmpLine)))) - // 2.11.3 Executable Allocate directive TYPE_PARSER( sourced(construct(verbatim("ALLOCATE"_tok), @@ -1622,16 +1664,6 @@ TYPE_PARSER( maybe(nonemptyList(Parser{})) / endOmpLine, statement(allocateStmt)))) -// 6.7 Allocators construct [OpenMP 5.2] -// allocators-construct -> ALLOCATORS [allocate-clause [,]] -// allocate-stmt -// [omp-end-allocators-construct] -TYPE_PARSER(sourced(construct( - verbatim("ALLOCATORS"_tok), Parser{} / endOmpLine, - statement(allocateStmt), maybe(Parser{} / endOmpLine)))) - -TYPE_PARSER(construct(startOmpLine >> "END ALLOCATORS"_tok)) - // 2.8.2 Declare Simd construct TYPE_PARSER(sourced(construct( verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok), diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index dcd1ac165adc1..b66d756bdbf2c 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2571,7 +2571,7 @@ class UnparseVisitor { Word(ToUpperCaseLetters(common::EnumToString(x))); } - void Unparse(const OpenMPAtomicConstruct &x) { + template void UnparseBlockConstruct(const Construct &x) { BeginOpenMP(); Word("!$OMP "); Walk(std::get(x.t)); @@ -2587,6 +2587,10 @@ class UnparseVisitor { } } + void Unparse(const OpenMPAtomicConstruct &x) { // + UnparseBlockConstruct(x); + } + void Unparse(const OpenMPExecutableAllocate &x) { const auto &fields = std::get>>( @@ -2614,22 +2618,8 @@ class UnparseVisitor { Put("\n"); EndOpenMP(); } - void Unparse(const OmpEndAllocators &x) { - BeginOpenMP(); - Word("!$OMP END ALLOCATE"); - Put("\n"); - EndOpenMP(); - } - void Unparse(const OpenMPAllocatorsConstruct &x) { - BeginOpenMP(); - Word("!$OMP ALLOCATE"); - Walk(std::get(x.t)); - Put("\n"); - EndOpenMP(); - Walk(std::get>(x.t)); - if (const auto &end = std::get>(x.t)) { - Walk(*end); - } + void Unparse(const OpenMPAllocatorsConstruct &x) { // + UnparseBlockConstruct(x); } void Unparse(const OmpAssumeDirective &x) { BeginOpenMP(); @@ -2768,6 +2758,9 @@ class UnparseVisitor { Put("\n"); EndOpenMP(); } + void Unparse(const OpenMPDispatchConstruct &x) { // + UnparseBlockConstruct(x); + } void Unparse(const OpenMPRequiresConstruct &y) { BeginOpenMP(); Word("!$OMP REQUIRES "); @@ -2787,15 +2780,6 @@ class UnparseVisitor { Walk(x.v); return false; } - void Unparse(const OmpDispatchDirective &x) { - Word("!$OMP DISPATCH"); - Walk(x.t); - Put("\n"); - } - void Unparse(const OmpEndDispatchDirective &) { - Word("!$OMP END DISPATCH"); - Put("\n"); - } void Unparse(const OmpErrorDirective &x) { Word("!$OMP ERROR "); Walk(x.t); diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index 047c604693460..c5ed8796f0c34 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -47,43 +47,12 @@ static bool operator!=(const evaluate::Expr &e, const evaluate::Expr &f) { return !(e == f); } -// There is no consistent way to get the source of a given ActionStmt, so -// extract the source information from Statement when we can, -// and keep it around for error reporting in further analyses. -struct SourcedActionStmt { - const parser::ActionStmt *stmt{nullptr}; - parser::CharBlock source; - - operator bool() const { return stmt != nullptr; } -}; - struct AnalyzedCondStmt { SomeExpr cond{evaluate::NullPointer{}}; // Default ctor is deleted parser::CharBlock source; SourcedActionStmt ift, iff; }; -static SourcedActionStmt GetActionStmt( - const parser::ExecutionPartConstruct *x) { - if (x == nullptr) { - return SourcedActionStmt{}; - } - if (auto *exec{std::get_if(&x->u)}) { - using ActionStmt = parser::Statement; - if (auto *stmt{std::get_if(&exec->u)}) { - return SourcedActionStmt{&stmt->statement, stmt->source}; - } - } - return SourcedActionStmt{}; -} - -static SourcedActionStmt GetActionStmt(const parser::Block &block) { - if (block.size() == 1) { - return GetActionStmt(&block.front()); - } - return SourcedActionStmt{}; -} - // Compute the `evaluate::Assignment` from parser::ActionStmt. The assumption // is that the ActionStmt will be either an assignment or a pointer-assignment, // otherwise return std::nullopt. diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 89c1565bf66aa..2425265e196c6 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -495,6 +495,12 @@ template struct DirectiveSpellingVisitor { template bool Pre(const T &) { return true; } template void Post(const T &) {} + template + static const parser::OmpDirectiveName &GetDirName( + const std::tuple &t) { + return std::get(t).DirName(); + } + bool Pre(const parser::OmpSectionsDirective &x) { checker_(x.source, x.v); return false; @@ -503,8 +509,8 @@ template struct DirectiveSpellingVisitor { checker_(std::get(x.t).source, Directive::OMPD_allocate); return false; } - bool Pre(const parser::OmpDispatchDirective &x) { - checker_(std::get(x.t).source, Directive::OMPD_dispatch); + bool Pre(const parser::OpenMPDispatchConstruct &x) { + checker_(GetDirName(x.t).source, Directive::OMPD_dispatch); return false; } bool Pre(const parser::OmpErrorDirective &x) { @@ -520,8 +526,7 @@ template struct DirectiveSpellingVisitor { return false; } bool Pre(const parser::OpenMPAllocatorsConstruct &x) { - checker_( - std::get(x.t).source, Directive::OMPD_allocators); + checker_(GetDirName(x.t).source, Directive::OMPD_allocators); return false; } bool Pre(const parser::OmpAssumeDirective &x) { @@ -1590,28 +1595,31 @@ void OmpStructureChecker::Enter(const parser::OmpErrorDirective &x) { } void OmpStructureChecker::Enter(const parser::OpenMPDispatchConstruct &x) { - PushContextAndClauseSets(x.source, llvm::omp::Directive::OMPD_dispatch); + auto &dirSpec{std::get(x.t)}; const auto &block{std::get(x.t)}; - if (block.empty() || block.size() > 1) { + PushContextAndClauseSets( + dirSpec.DirName().source, llvm::omp::Directive::OMPD_dispatch); + + if (block.empty()) { context_.Say(x.source, - "The DISPATCH construct is empty or contains more than one statement"_err_en_US); + "The DISPATCH construct should contain a single function or subroutine call"_err_en_US); return; } - auto it{block.begin()}; bool passChecks{false}; - if (const parser::AssignmentStmt * - assignStmt{parser::Unwrap(*it)}) { + omp::SourcedActionStmt action{omp::GetActionStmt(block)}; + if (const auto *assignStmt{ + parser::Unwrap(*action.stmt)}) { if (parser::Unwrap(assignStmt->t)) { passChecks = true; } - } else if (parser::Unwrap(*it)) { + } else if (parser::Unwrap(*action.stmt)) { passChecks = true; } if (!passChecks) { - context_.Say(x.source, - "The DISPATCH construct does not contain a SUBROUTINE or FUNCTION"_err_en_US); + context_.Say(action.source, + "The body of the DISPATCH construct should be a function or a subroutine call"_err_en_US); } } @@ -1657,26 +1665,45 @@ void OmpStructureChecker::Leave(const parser::OpenMPExecutableAllocate &x) { void OmpStructureChecker::Enter(const parser::OpenMPAllocatorsConstruct &x) { isPredefinedAllocator = true; - const auto &dir{std::get(x.t)}; - PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_allocators); - const auto &clauseList{std::get(x.t)}; - for (const auto &clause : clauseList.v) { + + auto &dirSpec{std::get(x.t)}; + auto &block{std::get(x.t)}; + PushContextAndClauseSets( + dirSpec.DirName().source, llvm::omp::Directive::OMPD_allocators); + + if (block.empty()) { + context_.Say(dirSpec.source, + "The ALLOCATORS construct should contain a single ALLOCATE statement"_err_en_US); + return; + } + + omp::SourcedActionStmt action{omp::GetActionStmt(block)}; + const auto *allocate{ + action ? parser::Unwrap(action.stmt) : nullptr}; + + if (!allocate) { + const parser::CharBlock &source = action ? action.source : x.source; + context_.Say(source, + "The body of the ALLOCATORS construct should be an ALLOCATE statement"_err_en_US); + } + + for (const auto &clause : dirSpec.Clauses().v) { if (const auto *allocClause{ parser::Unwrap(clause)}) { CheckVarIsNotPartOfAnotherVar( - dir.source, std::get(allocClause->v.t)); + dirSpec.source, std::get(allocClause->v.t)); } } } void OmpStructureChecker::Leave(const parser::OpenMPAllocatorsConstruct &x) { - const auto &dir{std::get(x.t)}; - const auto &clauseList{std::get(x.t)}; - for (const auto &clause : clauseList.v) { + auto &dirSpec{std::get(x.t)}; + + for (const auto &clause : dirSpec.Clauses().v) { if (const auto *allocClause{ std::get_if(&clause.u)}) { CheckPredefinedAllocatorRestriction( - dir.source, std::get(allocClause->v.t)); + dirSpec.source, std::get(allocClause->v.t)); } } dirContext_.pop_back(); diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index fd9596a09cd52..f43d2cc75620e 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -38,6 +38,26 @@ namespace Fortran::semantics::omp { +SourcedActionStmt GetActionStmt(const parser::ExecutionPartConstruct *x) { + if (x == nullptr) { + return SourcedActionStmt{}; + } + if (auto *exec{std::get_if(&x->u)}) { + using ActionStmt = parser::Statement; + if (auto *stmt{std::get_if(&exec->u)}) { + return SourcedActionStmt{&stmt->statement, stmt->source}; + } + } + return SourcedActionStmt{}; +} + +SourcedActionStmt GetActionStmt(const parser::Block &block) { + if (block.size() == 1) { + return GetActionStmt(&block.front()); + } + return SourcedActionStmt{}; +} + std::string ThisVersion(unsigned version) { std::string tv{ std::to_string(version / 10) + "." + std::to_string(version % 10)}; diff --git a/flang/lib/Semantics/openmp-utils.h b/flang/lib/Semantics/openmp-utils.h index dbb0565215357..a96c008fb26e7 100644 --- a/flang/lib/Semantics/openmp-utils.h +++ b/flang/lib/Semantics/openmp-utils.h @@ -29,6 +29,19 @@ class Symbol; // Add this namespace to avoid potential conflicts namespace omp { +// There is no consistent way to get the source of an ActionStmt, but there +// is "source" in Statement. This structure keeps the ActionStmt with the +// extracted source for further use. +struct SourcedActionStmt { + const parser::ActionStmt *stmt{nullptr}; + parser::CharBlock source; + + operator bool() const { return stmt != nullptr; } +}; + +SourcedActionStmt GetActionStmt(const parser::ExecutionPartConstruct *x); +SourcedActionStmt GetActionStmt(const parser::Block &block); + std::string ThisVersion(unsigned version); std::string TryVersion(unsigned version); diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 299bb6ff876e7..151f4ccae634e 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -10,6 +10,7 @@ #include "check-acc-structure.h" #include "check-omp-structure.h" +#include "openmp-utils.h" #include "resolve-names-utils.h" #include "flang/Common/idioms.h" #include "flang/Evaluate/fold.h" @@ -353,12 +354,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { return true; } - bool Pre(const parser::OmpDirectiveSpecification &x) { - PushContext(x.source, x.DirId()); - return true; - } - void Post(const parser::OmpDirectiveSpecification &) { PopContext(); } - bool Pre(const parser::OmpMetadirectiveDirective &x) { PushContext(x.source, llvm::omp::Directive::OMPD_metadirective); return true; @@ -372,6 +367,29 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { GetContext().withinConstruct = true; } + bool Pre(const parser::OpenMPStandaloneConstruct &x) { + common::visit( + [&](auto &&s) { + using TypeS = llvm::remove_cvref_t; + // These two cases are handled individually. + if constexpr ( // + !std::is_same_v && + !std::is_same_v) { + PushContext(x.source, s.v.DirId()); + } + }, + x.u); + return true; + } + + void Post(const parser::OpenMPStandaloneConstruct &x) { + // These two cases are handled individually. + if (!std::holds_alternative(x.u) && + !std::holds_alternative(x.u)) { + PopContext(); + } + } + bool Pre(const parser::OpenMPSimpleStandaloneConstruct &); void Post(const parser::OpenMPSimpleStandaloneConstruct &) { PopContext(); } @@ -2149,9 +2167,10 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPExecutableAllocate &x) { } bool OmpAttributeVisitor::Pre(const parser::OpenMPAllocatorsConstruct &x) { - PushContext(x.source, llvm::omp::Directive::OMPD_allocators); - const auto &clauseList{std::get(x.t)}; - for (const auto &clause : clauseList.v) { + auto &dirSpec{std::get(x.t)}; + PushContext(x.source, dirSpec.DirId()); + + for (const auto &clause : dirSpec.Clauses().v) { if (const auto *allocClause{ std::get_if(&clause.u)}) { ResolveOmpObjectList(std::get(allocClause->v.t), @@ -2234,28 +2253,43 @@ void OmpAttributeVisitor::Post(const parser::OpenMPExecutableAllocate &x) { } void OmpAttributeVisitor::Post(const parser::OpenMPAllocatorsConstruct &x) { - const auto &dir{std::get(x.t)}; - const auto &clauseList{std::get(x.t)}; - for (const auto &clause : clauseList.v) { - if (const auto *alloc{ - std::get_if(&clause.u)}) { - CheckAllNamesInAllocateStmt(dir.source, - std::get(alloc->v.t), - std::get>(x.t).statement); - - auto &modifiers{OmpGetModifiers(alloc->v)}; - bool hasAllocator{ - OmpGetUniqueModifier(modifiers) || - OmpGetUniqueModifier(modifiers)}; - - // TODO: As with allocate directive, exclude the case when a requires - // directive with the dynamic_allocators clause is present in - // the same compilation unit (OMP5.0 2.11.3). - if (IsNestedInDirective(llvm::omp::Directive::OMPD_target) && - !hasAllocator) { - context_.Say(x.source, - "ALLOCATORS directives that appear in a TARGET region " - "must specify an allocator"_err_en_US); + auto &dirSpec{std::get(x.t)}; + auto &block{std::get(x.t)}; + + omp::SourcedActionStmt action{omp::GetActionStmt(block)}; + const parser::AllocateStmt *allocate{[&]() { + if (action) { + if (auto *alloc{std::get_if>( + &action.stmt->u)}) { + return &alloc->value(); + } + } + return static_cast(nullptr); + }()}; + + if (allocate) { + for (const auto &clause : dirSpec.Clauses().v) { + if (auto *alloc{std::get_if(&clause.u)}) { + CheckAllNamesInAllocateStmt( + x.source, std::get(alloc->v.t), *allocate); + + using OmpAllocatorSimpleModifier = parser::OmpAllocatorSimpleModifier; + using OmpAllocatorComplexModifier = parser::OmpAllocatorComplexModifier; + + auto &modifiers{OmpGetModifiers(alloc->v)}; + bool hasAllocator{ + OmpGetUniqueModifier(modifiers) || + OmpGetUniqueModifier(modifiers)}; + + // TODO: As with allocate directive, exclude the case when a requires + // directive with the dynamic_allocators clause is present in + // the same compilation unit (OMP5.0 2.11.3). + if (IsNestedInDirective(llvm::omp::Directive::OMPD_target) && + !hasAllocator) { + context_.Say(x.source, + "ALLOCATORS directives that appear in a TARGET region " + "must specify an allocator"_err_en_US); + } } } } diff --git a/flang/test/Fir/OpenACC/openacc-mappable.fir b/flang/test/Fir/OpenACC/openacc-mappable.fir index 3e3e455469f69..71576f4b71075 100644 --- a/flang/test/Fir/OpenACC/openacc-mappable.fir +++ b/flang/test/Fir/OpenACC/openacc-mappable.fir @@ -23,7 +23,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, // CHECK: Size: 40 // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "arr", structured = false} - // CHECK: Mappable: !fir.array<10xf32> + // CHECK: Pointer-like and Mappable: !fir.ref> // CHECK: Type category: array // CHECK: Size: 40 @@ -60,20 +60,17 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, } // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "arr1", structured = false} - // CHECK: Pointer-like: !fir.ref> - // CHECK: Mappable: !fir.array + // CHECK: Pointer-like and Mappable: !fir.ref> // CHECK: Type category: array // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index) // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "arr2", structured = false} - // CHECK: Pointer-like: !fir.ref> - // CHECK: Mappable: !fir.array + // CHECK: Pointer-like and Mappable: !fir.ref> // CHECK: Type category: array // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c2{{.*}} : index) // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "arr3", structured = false} - // CHECK: Pointer-like: !fir.ref> - // CHECK: Mappable: !fir.array<10xf32> + // CHECK: Pointer-like and Mappable: !fir.ref> // CHECK: Type category: array // CHECK: Size: 40 // CHECK: Offset: 0 diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 b/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 index 58025bfa556a5..e8951cceeeaeb 100644 --- a/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 +++ b/flang/test/Fir/OpenACC/openacc-type-categories-class.f90 @@ -29,13 +29,13 @@ subroutine init_unlimited(this) ! CHECK: Mappable: !fir.class> ! CHECK: Type category: composite ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "this%field", structured = false} -! CHECK: Pointer-like: !fir.ref +! CHECK: Pointer-like and Mappable: !fir.ref ! CHECK: Type category: composite ! For unlimited polymorphic entities and assumed types - they effectively have ! no declared type. Thus the type categorizer cannot categorize it. ! CHECK: Visiting: {{.*}} = acc.copyin {{.*}} {name = "var", structured = false} -! CHECK: Pointer-like: !fir.ref +! CHECK: Pointer-like and Mappable: !fir.ref ! CHECK: Type category: uncategorized ! CHECK: Visiting: {{.*}} = acc.copyin {{.*}} {name = "this", structured = false} ! CHECK: Mappable: !fir.class diff --git a/flang/test/Fir/OpenACC/openacc-type-categories.f90 b/flang/test/Fir/OpenACC/openacc-type-categories.f90 index c25c38422b755..3d6067db8224d 100644 --- a/flang/test/Fir/OpenACC/openacc-type-categories.f90 +++ b/flang/test/Fir/OpenACC/openacc-type-categories.f90 @@ -18,32 +18,32 @@ program main end program ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "scalar", structured = false} -! CHECK: Pointer-like: !fir.ref +! CHECK: Pointer-like and Mappable: !fir.ref ! CHECK: Type category: scalar ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "scalaralloc", structured = false} -! CHECK: Pointer-like: !fir.ref>> +! CHECK: Pointer-like and Mappable: !fir.ref>> ! CHECK: Type category: nonscalar ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "ttvar", structured = false} -! CHECK: Pointer-like: !fir.ref}>> +! CHECK: Pointer-like and Mappable: !fir.ref}>> ! CHECK: Type category: composite ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "arrayconstsize", structured = false} -! CHECK: Pointer-like: !fir.ref> +! CHECK: Pointer-like and Mappable: !fir.ref> ! CHECK: Type category: array ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "arrayalloc", structured = false} -! CHECK: Pointer-like: !fir.ref>>> +! CHECK: Pointer-like and Mappable: !fir.ref>>> ! CHECK: Type category: array ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "complexvar", structured = false} -! CHECK: Pointer-like: !fir.ref> +! CHECK: Pointer-like and Mappable: !fir.ref> ! CHECK: Type category: scalar ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "charvar", structured = false} -! CHECK: Pointer-like: !fir.ref> +! CHECK: Pointer-like and Mappable: !fir.ref> ! CHECK: Type category: nonscalar ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "ttvar%field", structured = false} -! CHECK: Pointer-like: !fir.ref +! CHECK: Pointer-like and Mappable: !fir.ref ! CHECK: Type category: composite ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "ttvar%fieldarray", structured = false} -! CHECK: Pointer-like: !fir.ref> +! CHECK: Pointer-like and Mappable: !fir.ref> ! CHECK: Type category: array ! CHECK: Visiting: {{.*}} acc.copyin {{.*}} {name = "arrayconstsize(1)", structured = false} -! CHECK: Pointer-like: !fir.ref> +! CHECK: Pointer-like and Mappable: !fir.ref> ! CHECK: Type category: array diff --git a/flang/test/Fir/do_concurrent.fir b/flang/test/Fir/do_concurrent.fir index cc1197ba56bd7..6e2173447855e 100644 --- a/flang/test/Fir/do_concurrent.fir +++ b/flang/test/Fir/do_concurrent.fir @@ -63,7 +63,7 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, %j = fir.alloca i32 fir.do_concurrent.loop (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) - reduce(#fir.reduce_attr -> %sum : !fir.ref) { + reduce(@add_reduction_i32 #fir.reduce_attr %sum -> %sum_arg : !fir.ref) { %0 = fir.convert %i_iv : (index) -> i32 fir.store %0 to %i : !fir.ref @@ -83,7 +83,7 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, // CHECK: %[[I:.*]] = fir.alloca i32 // CHECK: %[[J:.*]] = fir.alloca i32 // CHECK: fir.do_concurrent.loop -// CHECK-SAME: (%[[I_IV:.*]], %[[J_IV:.*]]) = (%[[I_LB]], %[[J_LB]]) to (%[[I_UB]], %[[J_UB]]) step (%[[I_ST]], %[[J_ST]]) reduce(#fir.reduce_attr -> %[[SUM]] : !fir.ref) { +// CHECK-SAME: (%[[I_IV:.*]], %[[J_IV:.*]]) = (%[[I_LB]], %[[J_LB]]) to (%[[I_UB]], %[[J_UB]]) step (%[[I_ST]], %[[J_ST]]) reduce(@add_reduction_i32 #fir.reduce_attr %[[SUM]] -> %{{.*}} : !fir.ref) { // CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32 // CHECK: fir.store %[[I_IV_CVT]] to %[[I]] : !fir.ref // CHECK: %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32 @@ -161,3 +161,62 @@ func.func @do_concurrent_with_locality_specs() { // CHECK: } // CHECK: return // CHECK: } + +func.func @dc_reduce() { + %3 = fir.alloca i32 {bindc_name = "s", uniq_name = "dc_reduce"} + %4:2 = hlfir.declare %3 {uniq_name = "dc_reduce"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1 = arith.constant 1 : index + fir.do_concurrent { + fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) reduce(byref @add_reduction_i32 #fir.reduce_attr %4#0 -> %arg1 : !fir.ref) { + } + } + return +} + +// CHECK-LABEL: func.func @dc_reduce() { +// CHECK: %[[S_ALLOC:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "dc_reduce"} +// CHECK: %[[S_DECL:.*]]:2 = hlfir.declare %[[S_ALLOC]] {uniq_name = "dc_reduce"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: fir.do_concurrent { +// CHECK: fir.do_concurrent.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) reduce(byref @add_reduction_i32 #fir.reduce_attr %[[S_DECL]]#0 -> %[[S_ARG:.*]] : !fir.ref) { +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: } + +func.func @dc_reduce_2() { + %3 = fir.alloca i32 {bindc_name = "s", uniq_name = "dc_reduce"} + %4:2 = hlfir.declare %3 {uniq_name = "dc_reduce"} : (!fir.ref) -> (!fir.ref, !fir.ref) + + %5 = fir.alloca i32 {bindc_name = "m", uniq_name = "dc_reduce"} + %6:2 = hlfir.declare %5 {uniq_name = "dc_reduce"} : (!fir.ref) -> (!fir.ref, !fir.ref) + + %c1 = arith.constant 1 : index + + fir.do_concurrent { + fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) + reduce(@add_reduction_i32 #fir.reduce_attr %4#0 -> %arg1, + @mul_reduction_i32 #fir.reduce_attr %6#0 -> %arg2 + : !fir.ref, !fir.ref) { + } + } + + return +} + +// CHECK-LABEL: func.func @dc_reduce_2() { +// CHECK: %[[S_ALLOC:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "dc_reduce"} +// CHECK: %[[S_DECL:.*]]:2 = hlfir.declare %[[S_ALLOC]] {uniq_name = "dc_reduce"} : (!fir.ref) -> (!fir.ref, !fir.ref) + +// CHECK: %[[M_ALLOC:.*]] = fir.alloca i32 {bindc_name = "m", uniq_name = "dc_reduce"} +// CHECK: %[[M_DECL:.*]]:2 = hlfir.declare %[[M_ALLOC]] {uniq_name = "dc_reduce"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: fir.do_concurrent { +// CHECK: fir.do_concurrent.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{[^[:space:]]+}}) +// CHECK-SAME: reduce( +// CHECK-SAME: @add_reduction_i32 #fir.reduce_attr %[[S_DECL]]#0 -> %[[S_ARG:[^,]+]], +// CHECK-SAME: @mul_reduction_i32 #fir.reduce_attr %[[M_DECL]]#0 -> %[[M_ARG:[^[:space:]]+]] +// CHECK-SAME: : !fir.ref, !fir.ref) { +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: } + diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index aca0ecc1abdc1..e32ea7ad3c729 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -1256,8 +1256,8 @@ func.func @dc_invalid_reduction(%arg0: index, %arg1: index) { %sum = fir.alloca i32 // expected-error@+2 {{'fir.do_concurrent.loop' op mismatch in number of reduction variables and reduction attributes}} fir.do_concurrent { - "fir.do_concurrent.loop"(%arg0, %arg1, %arg0, %sum) <{operandSegmentSizes = array}> ({ - ^bb0(%arg3: index): + "fir.do_concurrent.loop"(%arg0, %arg1, %arg0, %sum) <{operandSegmentSizes = array}> ({ + ^bb0(%arg3: index, %sum_arg: i32): %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref }) : (index, index, index, !fir.ref) -> () } @@ -1266,6 +1266,20 @@ func.func @dc_invalid_reduction(%arg0: index, %arg1: index) { // ----- +func.func @dc_reduce_no_attr() { + %3 = fir.alloca i32 {bindc_name = "s", uniq_name = "dc_reduce"} + %4:2 = hlfir.declare %3 {uniq_name = "dc_reduce"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1 = arith.constant 1 : index + // expected-error@+2 {{expected attribute value}} + fir.do_concurrent { + fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) reduce(@add_reduction_i32 %4#0 -> %arg1 : !fir.ref) { + } + } + return +} + +// ----- + // Should fail when volatility changes from a fir.convert func.func @bad_convert_volatile(%arg0: !fir.ref) -> !fir.ref { // expected-error@+1 {{op this conversion does not preserve volatility}} diff --git a/flang/test/HLFIR/fir-reduction-alloca-block.fir b/flang/test/HLFIR/fir-reduction-alloca-block.fir new file mode 100644 index 0000000000000..75857cfbe01d3 --- /dev/null +++ b/flang/test/HLFIR/fir-reduction-alloca-block.fir @@ -0,0 +1,31 @@ +// Tests that `fir.local` ops are able to provide an alloca block when required. + +// RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s + +fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 : !fir.ref>>> alloc { + %0 = fir.alloca !fir.box>> + fir.yield(%0 : !fir.ref>>>) +} init { +^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + %cst = arith.constant 0.000000e+00 : f32 + %0 = fir.load %arg1 : !fir.ref>>> + hlfir.assign %cst to %0 : f32, !fir.box>> + fir.yield(%arg1 : !fir.ref>>>) +} combiner { +^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + fir.yield(%arg0 : !fir.ref>>>) +} + +// CHECK-LABEL: fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 : !fir.ref>>> alloc { +// CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>> +// CHECK: fir.yield(%[[VAL_0]] : !fir.ref>>>) + +// CHECK-LABEL: } init { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>, %[[VAL_1:.*]]: !fir.ref>>>): +// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.box>> +// CHECK: fir.yield(%[[VAL_1]] : !fir.ref>>>) + +// CHECK-LABEL: } combiner { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>, %[[VAL_1:.*]]: !fir.ref>>>): +// CHECK: fir.yield(%[[VAL_0]] : !fir.ref>>>) +// CHECK: } diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index 3a9b55996d9b1..3a4aff977b7a5 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -436,3 +436,11 @@ end subroutine ! CHECK-LABEL: func.func @_QPsub22() ! CHECK: cuf.data_transfer + +subroutine sub23(n) + integer :: n + real(8), device :: d(n,n), x(n) + x = sum(d,dim=2) ! Was triggering Unsupported CUDA data transfer +end subroutine + +! CHECK-LABEL: func.func @_QPsub23 diff --git a/flang/test/Lower/do_concurrent_reduce.f90 b/flang/test/Lower/do_concurrent_reduce.f90 new file mode 100644 index 0000000000000..8591a21e2b9e0 --- /dev/null +++ b/flang/test/Lower/do_concurrent_reduce.f90 @@ -0,0 +1,41 @@ +! RUN: %flang_fc1 -emit-hlfir -mmlir --enable-delayed-privatization-staging=true -o - %s | FileCheck %s + +subroutine do_concurrent_reduce + implicit none + integer :: s, i + + do concurrent (i=1:10) reduce(+:s) + s = s + 1 + end do +end + +! CHECK-LABEL: fir.declare_reduction @add_reduction_i32 : i32 init { +! CHECK: ^bb0(%[[ARG0:.*]]: i32): +! CHECK: %[[VAL_0:.*]] = arith.constant 0 : i32 +! CHECK: fir.yield(%[[VAL_0]] : i32) +! CHECK: } combiner { +! CHECK: ^bb0(%[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i32): +! CHECK: %[[VAL_3:.*]] = arith.addi %[[VAL_1]], %[[VAL_2]] : i32 +! CHECK: fir.yield(%[[VAL_3]] : i32) +! CHECK: } + +! CHECK-LABEL: func.func @_QPdo_concurrent_reduce() { +! CHECK: %[[S_ALLOC:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} +! CHECK: %[[S_DECL:.*]]:2 = hlfir.declare %[[S_ALLOC]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) + +! CHECK: fir.do_concurrent { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: fir.do_concurrent.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{[^[:space:]]+}}) +! CHECK-SAME: reduce(@add_reduction_i32 #fir.reduce_attr %[[S_DECL]]#0 -> %[[S_ARG:.*]] : !fir.ref) { + +! CHECK: %[[S_ARG_DECL:.*]]:2 = hlfir.declare %[[S_ARG]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[S_ARG_VAL:.*]] = fir.load %[[S_ARG_DECL]]#0 : !fir.ref +! CHECK: %[[C1:.*]] = arith.constant 1 : i32 +! CHECK: %[[RED_UPDATE:.*]] = arith.addi %[[S_ARG_VAL]], %[[C1]] : i32 +! CHECK: hlfir.assign %[[RED_UPDATE]] to %[[S_ARG_DECL]]#0 : i32, !fir.ref + +! CHECK: } +! CHECK: } +! CHECK: return +! CHECK: } diff --git a/flang/test/Lower/do_concurrent_reduce_allocatable.f90 b/flang/test/Lower/do_concurrent_reduce_allocatable.f90 new file mode 100644 index 0000000000000..873fd10dd1b97 --- /dev/null +++ b/flang/test/Lower/do_concurrent_reduce_allocatable.f90 @@ -0,0 +1,22 @@ +! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s + +subroutine do_concurrent_allocatable + integer :: i + real, allocatable, dimension(:,:) :: x + + do concurrent (i = 1:10) reduce(+: x) + end do +end subroutine + +! CHECK: fir.declare_reduction @[[RED_OP:.*]] : ![[RED_TYPE:.*]] alloc { +! CHECK: %[[ALLOC:.*]] = fir.alloca +! CHECK: fir.yield(%[[ALLOC]] : ![[RED_TYPE]]) +! CHECK: } init { +! CHECK: ^bb0(%{{.*}}: ![[RED_TYPE]], %[[RED_ARG:.*]]: ![[RED_TYPE]]): +! CHECK: fir.yield(%[[RED_ARG]] : !{{.*}}) +! CHECK: } combiner { +! CHECK: ^bb0(%[[COMB_RES:.*]]: ![[RED_TYPE]], %{{.*}}: ![[RED_TYPE]]): +! CHECK: fir.yield(%[[COMB_RES]] : !{{.*}}) +! CHECK: } cleanup { +! CHECK: fir.yield +! CHECK: } diff --git a/flang/test/Lower/loops.f90 b/flang/test/Lower/loops.f90 index 60df27a591dc3..64f14ff972272 100644 --- a/flang/test/Lower/loops.f90 +++ b/flang/test/Lower/loops.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s +! RUN: bbc -emit-fir -hlfir=false --enable-delayed-privatization=false -o - %s | FileCheck %s ! CHECK-LABEL: loop_test subroutine loop_test diff --git a/flang/test/Lower/loops3.f90 b/flang/test/Lower/loops3.f90 index 84db1972cca16..2965b954b49a8 100644 --- a/flang/test/Lower/loops3.f90 +++ b/flang/test/Lower/loops3.f90 @@ -12,7 +12,7 @@ subroutine loop_test ! CHECK: %[[VAL_0:.*]] = fir.alloca f32 {bindc_name = "m", uniq_name = "_QFloop_testEm"} ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFloop_testEsum) : !fir.ref - ! CHECK: fir.do_concurrent.loop ({{.*}}) = ({{.*}}) to ({{.*}}) step ({{.*}}) reduce(#fir.reduce_attr -> %[[VAL_1:.*]] : !fir.ref, #fir.reduce_attr -> %[[VAL_0:.*]] : !fir.ref) { + ! CHECK: fir.do_concurrent.loop ({{.*}}) = ({{.*}}) to ({{.*}}) step ({{.*}}) reduce(@add_reduction_i32 #fir.reduce_attr %[[VAL_1]] -> %{{.*}}, @other_reduction_f32 #fir.reduce_attr %[[VAL_0]] -> %{{.*}} : {{.*}}) { do concurrent (i=1:5, j=1:5, k=1:5) local(tmp) reduce(+:sum) reduce(max:m) tmp = i + j + k sum = tmp + sum diff --git a/flang/test/Parser/OpenMP/allocators-unparse.f90 b/flang/test/Parser/OpenMP/allocators-unparse.f90 index 5cd0230471fc4..70feb7a6b527e 100644 --- a/flang/test/Parser/OpenMP/allocators-unparse.f90 +++ b/flang/test/Parser/OpenMP/allocators-unparse.f90 @@ -15,48 +15,62 @@ subroutine allocate() !$omp allocators allocate(align(32): arr2) allocate(arr2(5, 3)) + !$omp end allocators end subroutine allocate !CHECK: INTEGER, ALLOCATABLE :: arr1(:), arr2(:,:) -!CHECK-NEXT:!$OMP ALLOCATE ALLOCATE(omp_default_mem_alloc: arr1) +!CHECK-NEXT:!$OMP ALLOCATORS ALLOCATE(omp_default_mem_alloc: arr1) !CHECK-NEXT: ALLOCATE(arr1(5)) -!CHECK-NEXT:!$OMP ALLOCATE ALLOCATE(ALLOCATOR(omp_default_mem_alloc), ALIGN(32): arr1) ALL& -!CHECK-NEXT:!$OMP&OCATE(omp_default_mem_alloc: arr2) +!CHECK-NEXT:!$OMP ALLOCATORS ALLOCATE(ALLOCATOR(omp_default_mem_alloc), ALIGN(32): arr1) A& +!CHECK-NEXT:!$OMP&LLOCATE(omp_default_mem_alloc: arr2) !CHECK-NEXT: ALLOCATE(arr1(10), arr2(3,2)) -!CHECK-NEXT:!$OMP ALLOCATE ALLOCATE(ALIGN(32): arr2) +!CHECK-NEXT:!$OMP ALLOCATORS ALLOCATE(ALIGN(32): arr2) !CHECK-NEXT: ALLOCATE(arr2(5,3)) !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAllocatorsConstruct -!PARSE-TREE-NEXT: Verbatim -!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause -!PARSE-TREE-NEXT: Modifier -> OmpAllocatorSimpleModifier -> Scalar -> Integer -> Expr -> Designator -> DataRef -> Name = 'omp_default_mem_alloc' -!PARSE-TREE-NEXT: OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'arr1' -!PARSE-TREE-NEXT: AllocateStmt -!PARSE-TREE-NEXT: Allocation -!PARSE-TREE-NEXT: AllocateObject -> Name = 'arr1' +!PARSE-TREE-NEXT: | OmpDirectiveSpecification +!PARSE-TREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocators +!PARSE-TREE-NEXT: | | OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause +!PARSE-TREE-NEXT: | | | Modifier -> OmpAllocatorSimpleModifier -> Scalar -> Integer -> Expr -> Designator -> DataRef -> Name = 'omp_default_mem_alloc' +!PARSE-TREE-NEXT: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'arr1' +!PARSE-TREE-NEXT: | | Flags = None +!PARSE-TREE-NEXT: | Block +!PARSE-TREE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AllocateStmt +!PARSE-TREE-NEXT: | | | Allocation +!PARSE-TREE-NEXT: | | | | AllocateObject -> Name = 'arr1' !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAllocatorsConstruct -!PARSE-TREE-NEXT: Verbatim -!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause -!PARSE-TREE-NEXT: Modifier -> OmpAllocatorComplexModifier -> Scalar -> Integer -> Expr -> Designator -> DataRef -> Name = 'omp_default_mem_alloc' -!PARSE-TREE-NEXT: Modifier -> OmpAlignModifier -> Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '32' -!PARSE-TREE-NEXT: OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'arr1' -!PARSE-TREE-NEXT: OmpClause -> Allocate -> OmpAllocateClause -!PARSE-TREE-NEXT: Modifier -> OmpAllocatorSimpleModifier -> Scalar -> Integer -> Expr -> Designator -> DataRef -> Name = 'omp_default_mem_alloc' -!PARSE-TREE-NEXT: OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'arr2' -!PARSE-TREE-NEXT: AllocateStmt -!PARSE-TREE-NEXT: Allocation -!PARSE-TREE-NEXT: AllocateObject -> Name = 'arr1' -!PARSE-TREE-NEXT: AllocateShapeSpec -!PARSE-TREE-NEXT: Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '10' -!PARSE-TREE-NEXT: Allocation -!PARSE-TREE-NEXT: AllocateObject -> Name = 'arr2' +!PARSE-TREE-NEXT: | OmpDirectiveSpecification +!PARSE-TREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocators +!PARSE-TREE-NEXT: | | OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause +!PARSE-TREE-NEXT: | | | Modifier -> OmpAllocatorComplexModifier -> Scalar -> Integer -> Expr -> Designator -> DataRef -> Name = 'omp_default_mem_alloc' +!PARSE-TREE-NEXT: | | | Modifier -> OmpAlignModifier -> Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '32' +!PARSE-TREE-NEXT: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'arr1' +!PARSE-TREE-NEXT: | | OmpClause -> Allocate -> OmpAllocateClause +!PARSE-TREE-NEXT: | | | Modifier -> OmpAllocatorSimpleModifier -> Scalar -> Integer -> Expr -> Designator -> DataRef -> Name = 'omp_default_mem_alloc' +!PARSE-TREE-NEXT: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'arr2' +!PARSE-TREE-NEXT: | | Flags = None +!PARSE-TREE-NEXT: | Block +!PARSE-TREE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AllocateStmt +!PARSE-TREE-NEXT: | | | Allocation +!PARSE-TREE-NEXT: | | | | AllocateObject -> Name = 'arr1' !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAllocatorsConstruct -!PARSE-TREE-NEXT: Verbatim -!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause -!PARSE-TREE-NEXT: Modifier -> OmpAlignModifier -> Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '32' -!PARSE-TREE-NEXT: OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'arr2' -!PARSE-TREE-NEXT: AllocateStmt -!PARSE-TREE-NEXT: Allocation -!PARSE-TREE-NEXT: AllocateObject -> Name = 'arr2' +!PARSE-TREE-NEXT: | OmpDirectiveSpecification +!PARSE-TREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocators +!PARSE-TREE-NEXT: | | OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause +!PARSE-TREE-NEXT: | | | Modifier -> OmpAlignModifier -> Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '32' +!PARSE-TREE-NEXT: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'arr2' +!PARSE-TREE-NEXT: | | Flags = None +!PARSE-TREE-NEXT: | Block +!PARSE-TREE-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AllocateStmt +!PARSE-TREE-NEXT: | | | Allocation +!PARSE-TREE-NEXT: | | | | AllocateObject -> Name = 'arr2' +!PARSE-TREE-NEXT: | | | | AllocateShapeSpec +!PARSE-TREE-NEXT: | | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '5' +!PARSE-TREE-NEXT: | | | | AllocateShapeSpec +!PARSE-TREE-NEXT: | | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '3' +!PARSE-TREE-NEXT: | OmpDirectiveSpecification +!PARSE-TREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocators +!PARSE-TREE-NEXT: | | OmpClauseList -> +!PARSE-TREE-NEXT: | | Flags = None diff --git a/flang/test/Parser/OpenMP/dispatch.f90 b/flang/test/Parser/OpenMP/dispatch.f90 index 98cd6090334f3..4076c00331225 100644 --- a/flang/test/Parser/OpenMP/dispatch.f90 +++ b/flang/test/Parser/OpenMP/dispatch.f90 @@ -1,4 +1,4 @@ -! RUN: %flang_fc1 -fopenmp -fdebug-dump-parse-tree %s | FileCheck %s +! RUN: %flang_fc1 -fopenmp -fdebug-dump-parse-tree %s | FileCheck %s --check-prefix=PARSE-TREE ! RUN: %flang_fc1 -fopenmp -fdebug-unparse %s | FileCheck %s --check-prefix="UNPARSE" integer function func(a, b, c) @@ -12,40 +12,57 @@ subroutine sub(x) integer :: r type(c_ptr) :: x integer :: a = 14, b = 7, c = 21 + !UNPARSE: !$OMP DISPATCH DEVICE(3_4) NOWAIT NOCONTEXT(.false._4) NOVARIANTS(.true._4) -!CHECK: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPDispatchConstruct -!CHECK-NEXT: | | | OmpDispatchDirective -!CHECK: | | | | OmpClauseList -> OmpClause -> Device -> OmpDeviceClause -!CHECK-NEXT: | | | | | Scalar -> Integer -> Expr = '3_4' -!CHECK-NEXT: | | | | | | LiteralConstant -> IntLiteralConstant = '3' -!CHECK-NEXT: | | | | OmpClause -> Nowait -!CHECK-NEXT: | | | | OmpClause -> Nocontext -> Scalar -> Logical -> Expr = '.false._4' -!CHECK-NEXT: | | | | | LiteralConstant -> LogicalLiteralConstant -!CHECK-NEXT: | | | | | | bool = 'false' -!CHECK-NEXT: | | | | OmpClause -> Novariants -> Scalar -> Logical -> Expr = '.true._4' -!CHECK-NEXT: | | | | | EQ -!CHECK-NEXT: | | | | | | Expr = '1_4' -!CHECK-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' -!CHECK-NEXT: | | | | | | Expr = '1_4' -!CHECK-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '1' -!CHECK-NEXT: | | | Block - +!UNPARSE: r=func(a,b,c) +!UNPARSE: !$OMP END DISPATCH + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPDispatchConstruct +!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = dispatch +!PARSE-TREE: | | OmpClauseList -> OmpClause -> Device -> OmpDeviceClause +!PARSE-TREE: | | | Scalar -> Integer -> Expr = '3_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '3' +!PARSE-TREE: | | OmpClause -> Nowait +!PARSE-TREE: | | OmpClause -> Nocontext -> Scalar -> Logical -> Expr = '.false._4' +!PARSE-TREE: | | | LiteralConstant -> LogicalLiteralConstant +!PARSE-TREE: | | | | bool = 'false' +!PARSE-TREE: | | OmpClause -> Novariants -> Scalar -> Logical -> Expr = '.true._4' +!PARSE-TREE: | | | EQ +!PARSE-TREE: | | | | Expr = '1_4' +!PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '1' +!PARSE-TREE: | | | | Expr = '1_4' +!PARSE-TREE: | | | | | LiteralConstant -> IntLiteralConstant = '1' +!PARSE-TREE: | | Flags = None +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt +![...] +!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = dispatch +!PARSE-TREE: | | OmpClauseList -> +!PARSE-TREE: | | Flags = None + !$omp dispatch device(3) nowait nocontext(.false.) novariants(1.eq.1) r = func(a, b, c) -!UNPARSE: !$OMP END DISPATCH -!CHECK: | | | OmpEndDispatchDirective !$omp end dispatch !! Test the "no end dispatch" option. -!UNPARSE: !$OMP DISPATCH DEVICE(3_4) IS_DEVICE_PTR(x) -!CHECK: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPDispatchConstruct -!CHECK-NEXT: | | | OmpDispatchDirective -!CHECK: | | | | OmpClause -> IsDevicePtr -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' +!UNPARSE: !$OMP DISPATCH DEVICE(3_4) IS_DEVICE_PTR(x) +!UNPARSE: r=func(a+1_4,b+2_4,c+3_4) + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPDispatchConstruct +!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = dispatch +!PARSE-TREE: | | OmpClauseList -> OmpClause -> Device -> OmpDeviceClause +!PARSE-TREE: | | | Scalar -> Integer -> Expr = '3_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '3' +!PARSE-TREE: | | OmpClause -> IsDevicePtr -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | Flags = None +!PARSE-TREE: | Block +!PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt +!PARSE-TREE-NOT: OmpDirectiveSpecification + !$omp dispatch device(3) is_device_ptr(x) r = func(a+1, b+2, c+3) -!CHECK-NOT: | | | OmpEndDispatchDirective end subroutine sub - - - diff --git a/flang/test/Semantics/OpenMP/allocators07.f90 b/flang/test/Semantics/OpenMP/allocators07.f90 new file mode 100644 index 0000000000000..a28f706965cb1 --- /dev/null +++ b/flang/test/Semantics/OpenMP/allocators07.f90 @@ -0,0 +1,27 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52 + +subroutine f00 + implicit none + integer, allocatable :: a(:) + + !$omp allocators allocate(a) +!ERROR: The body of the ALLOCATORS construct should be an ALLOCATE statement + continue +end + +subroutine f01 + implicit none + integer, allocatable :: a(:) + +!ERROR: The ALLOCATORS construct should contain a single ALLOCATE statement + !$omp allocators allocate(a) + !$omp end allocators +end + +subroutine f02 + implicit none + integer, allocatable :: a(:) + +!ERROR: The ALLOCATORS construct should contain a single ALLOCATE statement + !$omp allocators allocate(a) +end diff --git a/flang/test/Semantics/OpenMP/dispatch.f90 b/flang/test/Semantics/OpenMP/dispatch.f90 index 7dfbeecb2fc1d..af0d6856ab948 100644 --- a/flang/test/Semantics/OpenMP/dispatch.f90 +++ b/flang/test/Semantics/OpenMP/dispatch.f90 @@ -1,24 +1,20 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52 subroutine sb1 integer :: r r = 1 - !ERROR: The DISPATCH construct does not contain a SUBROUTINE or FUNCTION !$omp dispatch nowait +!ERROR: The body of the DISPATCH construct should be a function or a subroutine call print *,r end subroutine + subroutine sb2 - integer :: r -!ERROR: The DISPATCH construct is empty or contains more than one statement +!ERROR: The DISPATCH construct should contain a single function or subroutine call !$omp dispatch - call foo() - r = bar() !$omp end dispatch -contains - subroutine foo - end subroutine foo - function bar - integer :: bar - bar = 2 - end function +end subroutine + +subroutine sb3 +!ERROR: The DISPATCH construct should contain a single function or subroutine call + !$omp dispatch end subroutine diff --git a/flang/test/Transforms/DoConcurrent/reduce_add.mlir b/flang/test/Transforms/DoConcurrent/reduce_add.mlir new file mode 100644 index 0000000000000..1ea3e3e527335 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/reduce_add.mlir @@ -0,0 +1,73 @@ +// Tests mapping reductions from fir to OpenMP. + +// RUN: fir-opt --omp-do-concurrent-conversion="map-to=host" %s | FileCheck %s + +fir.declare_reduction @add_reduction_i32 : i32 init { +^bb0(%arg0: i32): + %c0_i32 = arith.constant 0 : i32 + fir.yield(%c0_i32 : i32) +} combiner { +^bb0(%arg0: i32, %arg1: i32): + %0 = arith.addi %arg0, %arg1 : i32 + fir.yield(%0 : i32) +} + +func.func @_QPdo_concurrent_reduce() { + %3 = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} + %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1 = arith.constant 1 : index + fir.do_concurrent { + %7 = fir.alloca i32 {bindc_name = "i"} + %8:2 = hlfir.declare %7 {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) reduce(@add_reduction_i32 #fir.reduce_attr %4#0 -> %arg1 : !fir.ref) { + %9 = fir.convert %arg0 : (index) -> i32 + fir.store %9 to %8#0 : !fir.ref + %10:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %11 = fir.load %10#0 : !fir.ref + %c1_i32_0 = arith.constant 1 : i32 + %12 = arith.addi %11, %c1_i32_0 : i32 + hlfir.assign %12 to %10#0 : i32, !fir.ref + } + } + return +} + +// CHECK-LABEL: omp.declare_reduction @add_reduction_i32.omp : i32 init { +// CHECK: ^bb0(%[[VAL_0:.*]]: i32): +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +// CHECK: omp.yield(%[[VAL_1]] : i32) + +// CHECK-LABEL: } combiner { +// CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32 +// CHECK: omp.yield(%[[VAL_2]] : i32) +// CHECK: } + +// CHECK-LABEL: func.func @_QPdo_concurrent_reduce() { +// CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: omp.parallel { +// CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + +// CHECK: omp.wsloop reduction(@add_reduction_i32.omp %[[VAL_3]]#0 -> %[[VAL_7:.*]] : !fir.ref) { +// CHECK: omp.loop_nest (%[[VAL_8:.*]]) : index = (%[[VAL_4]]) to (%[[VAL_4]]) inclusive step (%[[VAL_4]]) { +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (index) -> i32 +// CHECK: fir.store %[[VAL_9]] to %[[VAL_6]]#0 : !fir.ref +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32 +// CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_10]]#0 : i32, !fir.ref +// CHECK: omp.yield +// CHECK: } +// CHECK: } +// CHECK: omp.terminator +// CHECK: } + +// CHECK: return +// CHECK: } + diff --git a/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir b/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir new file mode 100644 index 0000000000000..3d5b8bf22af75 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir @@ -0,0 +1,70 @@ +// Tests mapping reductions from fir to OpenMP (all regions). + +// RUN: fir-opt --omp-do-concurrent-conversion="map-to=host" %s | FileCheck %s + +fir.declare_reduction @add_reduction_i32 : i32 init { +^bb0(%arg0: i32): + fir.yield(%arg0 : i32) +} combiner { +^bb0(%arg0: i32, %arg1: i32): + fir.yield(%arg0 : i32) +} atomic { +^bb0(%arg0: !fir.ref, %arg1: !fir.ref): + fir.yield(%arg0 : !fir.ref) +} cleanup { +^bb0(%arg0: i32): + fir.yield +} + +func.func @_QPdo_concurrent_reduce() { + %3 = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} + %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1 = arith.constant 1 : index + fir.do_concurrent { + %7 = fir.alloca i32 {bindc_name = "i"} + %8:2 = hlfir.declare %7 {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) reduce(@add_reduction_i32 #fir.reduce_attr %4#0 -> %arg1 : !fir.ref) { + %9 = fir.convert %arg0 : (index) -> i32 + fir.store %9 to %8#0 : !fir.ref + } + } + return +} + +// CHECK-LABEL: omp.declare_reduction @add_reduction_i32.omp : i32 init { +// CHECK: ^bb0(%[[VAL_0:.*]]: i32): +// CHECK: omp.yield(%[[VAL_0]] : i32) + +// CHECK-LABEL: } combiner { +// CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +// CHECK: omp.yield(%[[VAL_0]] : i32) + +// CHECK-LABEL: } atomic { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref, %[[VAL_1:.*]]: !fir.ref): +// CHECK: omp.yield(%[[VAL_0]] : !fir.ref) + +// CHECK-LABEL: } cleanup { +// CHECK: ^bb0(%[[VAL_0:.*]]: i32): +// CHECK: omp.yield +// CHECK: } + +// CHECK-LABEL: func.func @_QPdo_concurrent_reduce() { +// CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: omp.parallel { +// CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: omp.wsloop reduction(@add_reduction_i32.omp %[[VAL_3]]#0 -> %[[VAL_7:.*]] : !fir.ref) { +// CHECK: omp.loop_nest (%[[VAL_8:.*]]) : index = (%[[VAL_4]]) to (%[[VAL_4]]) inclusive step (%[[VAL_4]]) { +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (index) -> i32 +// CHECK: fir.store %[[VAL_9]] to %[[VAL_6]]#0 : !fir.ref +// CHECK: omp.yield +// CHECK: } +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: return +// CHECK: } diff --git a/flang/test/Transforms/DoConcurrent/reduce_local.mlir b/flang/test/Transforms/DoConcurrent/reduce_local.mlir new file mode 100644 index 0000000000000..0f667109e6e83 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/reduce_local.mlir @@ -0,0 +1,83 @@ +// Tests mapping reductions and local from fir to OpenMP. + +// RUN: fir-opt --omp-do-concurrent-conversion="map-to=host" %s | FileCheck %s + +fir.declare_reduction @add_reduction_i32 : i32 init { +^bb0(%arg0: i32): + %c0_i32 = arith.constant 0 : i32 + fir.yield(%c0_i32 : i32) +} combiner { +^bb0(%arg0: i32, %arg1: i32): + %0 = arith.addi %arg0, %arg1 : i32 + fir.yield(%0 : i32) +} + fir.local {type = local} @_QFdo_concurrent_reduceEl_private_i32 : i32 + func.func @_QPdo_concurrent_reduce() { + %3 = fir.alloca i32 {bindc_name = "l", uniq_name = "_QFdo_concurrent_reduceEl"} + %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %5 = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} + %6:2 = hlfir.declare %5 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1 = arith.constant 1 : index + fir.do_concurrent { + %9 = fir.alloca i32 {bindc_name = "i"} + %10:2 = hlfir.declare %9 {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) local(@_QFdo_concurrent_reduceEl_private_i32 %4#0 -> %arg1 : !fir.ref) reduce(@add_reduction_i32 #fir.reduce_attr %6#0 -> %arg2 : !fir.ref) { + %11 = fir.convert %arg0 : (index) -> i32 + fir.store %11 to %10#0 : !fir.ref + %12:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %13:2 = hlfir.declare %arg2 {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32_0 = arith.constant 1 : i32 + hlfir.assign %c1_i32_0 to %12#0 : i32, !fir.ref + %14 = fir.load %13#0 : !fir.ref + %15 = fir.load %12#0 : !fir.ref + %16 = arith.addi %14, %15 : i32 + hlfir.assign %16 to %13#0 : i32, !fir.ref + } + } + return +} + +// CHECK-LABEL: omp.declare_reduction @add_reduction_i32.omp : i32 init { +// CHECK: ^bb0(%[[VAL_0:.*]]: i32): +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +// CHECK: omp.yield(%[[VAL_1]] : i32) + +// CHECK-LABEL: } combiner { +// CHECK: ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32): +// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32 +// CHECK: omp.yield(%[[VAL_2]] : i32) +// CHECK: } + +// CHECK: omp.private {type = private} @_QFdo_concurrent_reduceEl_private_i32.omp : i32 + +// CHECK-LABEL: func.func @_QPdo_concurrent_reduce() { +// CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "l", uniq_name = "_QFdo_concurrent_reduceEl"} +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index +// CHECK: omp.parallel { +// CHECK: %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: omp.wsloop private(@_QFdo_concurrent_reduceEl_private_i32.omp %[[VAL_3]]#0 -> %[[VAL_9:.*]] : !fir.ref) reduction(@add_reduction_i32.omp %[[VAL_5]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +// CHECK: omp.loop_nest (%[[VAL_11:.*]]) : index = (%[[VAL_6]]) to (%[[VAL_6]]) inclusive step (%[[VAL_6]]) { +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (index) -> i32 +// CHECK: fir.store %[[VAL_12]] to %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 +// CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_13]]#0 : i32, !fir.ref +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref +// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_16]], %[[VAL_17]] : i32 +// CHECK: hlfir.assign %[[VAL_18]] to %[[VAL_14]]#0 : i32, !fir.ref +// CHECK: omp.yield +// CHECK: } +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: return +// CHECK: } + diff --git a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir index d9ef36b175598..c550ab8a97d4c 100644 --- a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir +++ b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir @@ -86,7 +86,7 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, %j = fir.alloca i32 fir.do_concurrent.loop (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) - reduce(#fir.reduce_attr -> %sum : !fir.ref) { + reduce(@add_reduction_i32 #fir.reduce_attr %sum -> %sum_arg : !fir.ref) { %0 = fir.convert %i_iv : (index) -> i32 fir.store %0 to %i : !fir.ref diff --git a/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp b/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp index e72b96fe7cd10..de6cb1d09080d 100644 --- a/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp +++ b/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp @@ -58,8 +58,18 @@ struct TestFIROpenACCInterfaces llvm::errs() << "Visiting: " << *op << "\n"; llvm::errs() << "\tVar: " << var << "\n"; - if (auto ptrTy = dyn_cast_if_present(typeOfVar)) { + if (mlir::isa(typeOfVar) && + mlir::isa(typeOfVar)) { + llvm::errs() << "\tPointer-like and Mappable: " << typeOfVar << "\n"; + } else if (mlir::isa(typeOfVar)) { llvm::errs() << "\tPointer-like: " << typeOfVar << "\n"; + } else { + assert( + mlir::isa(typeOfVar) && "expected mappable"); + llvm::errs() << "\tMappable: " << typeOfVar << "\n"; + } + + if (auto ptrTy = dyn_cast_if_present(typeOfVar)) { // If the pointee is not mappable, print details about it. Otherwise, // we defer to the mappable printing below to print those details. if (!mappableTy) { @@ -72,8 +82,6 @@ struct TestFIROpenACCInterfaces } if (mappableTy) { - llvm::errs() << "\tMappable: " << mappableTy << "\n"; - acc::VariableTypeCategory typeCategory = mappableTy.getTypeCategory(var); llvm::errs() << "\t\tType category: " << typeCategory << "\n"; diff --git a/libc/include/llvm-libc-macros/wchar-macros.h b/libc/include/llvm-libc-macros/wchar-macros.h index 5b211f5276b62..2a0cabd6133a4 100644 --- a/libc/include/llvm-libc-macros/wchar-macros.h +++ b/libc/include/llvm-libc-macros/wchar-macros.h @@ -9,8 +9,10 @@ #ifndef LLVM_LIBC_MACROS_WCHAR_MACROS_H #define LLVM_LIBC_MACROS_WCHAR_MACROS_H +#include "../llvm-libc-types/wint_t.h" + #ifndef WEOF -#define WEOF 0xffffffffu +#define WEOF ((wint_t)(0xffffffffu)) #endif #endif // LLVM_LIBC_MACROS_WCHAR_MACROS_H diff --git a/libc/shared/math.h b/libc/shared/math.h index 4ddc29c7ae834..9db53b69041d0 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -12,5 +12,6 @@ #include "libc_common.h" #include "math/expf.h" +#include "math/expf16.h" #endif // LLVM_LIBC_SHARED_MATH_H diff --git a/libc/shared/math/expf16.h b/libc/shared/math/expf16.h new file mode 100644 index 0000000000000..a6a3e89e680d4 --- /dev/null +++ b/libc/shared/math/expf16.h @@ -0,0 +1,29 @@ +//===-- Shared expf16 function ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_EXPF16_H +#define LLVM_LIBC_SHARED_MATH_EXPF16_H + +#include "include/llvm-libc-macros/float16-macros.h" +#include "shared/libc_common.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/math/expf16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::expf16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_EXPF16_H diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 7e85136c08851..294d68474bd53 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -180,6 +180,19 @@ add_header_library( libc.src.__support.common ) +add_header_library( + wcs_to_integer + HDRS + wcs_to_integer.h + DEPENDS + .wctype_utils + .str_to_num_result + libc.hdr.errno_macros + libc.src.__support.CPP.limits + libc.src.__support.CPP.type_traits + libc.src.__support.common +) + add_header_library( integer_to_string HDRS diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 66c1d19a1cab0..4c73fba6613fa 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -22,3 +22,36 @@ add_header_library( libc.src.__support.macros.config libc.src.__support.macros.optimization ) + +add_header_library( + expf16_utils + HDRS + expf16_utils.h + DEPENDS + libc.src.__support.CPP.array + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.macros.attributes + libc.include.llvm-libc-macros.float16_macros +) + +add_header_library( + expf16 + HDRS + expf16.h + DEPENDS + .expf16_utils + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.CPP.array + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + libc.include.llvm-libc-macros.float16_macros +) diff --git a/libc/src/__support/math/expf16.h b/libc/src/__support/math/expf16.h new file mode 100644 index 0000000000000..ded28c7dba500 --- /dev/null +++ b/libc/src/__support/math/expf16.h @@ -0,0 +1,141 @@ +//===-- Implementation header for expf16 ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +#include "expf16_utils.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +static constexpr float16 expf16(float16 x) { +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr fputil::ExceptValues EXPF16_EXCEPTS_LO = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.de4p-8, expf16(x) = 0x1.01cp+0 (RZ) + {0x1f79U, 0x3c07U, 1U, 0U, 0U}, + // x = 0x1.73cp-6, expf16(x) = 0x1.05cp+0 (RZ) + {0x25cfU, 0x3c17U, 1U, 0U, 0U}, + }}; + + constexpr fputil::ExceptValues EXPF16_EXCEPTS_HI = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.c34p+0, expf16(x) = 0x1.74cp+2 (RZ) + {0x3f0dU, 0x45d3U, 1U, 0U, 1U}, + // x = -0x1.488p-5, expf16(x) = 0x1.ebcp-1 (RZ) + {0xa922U, 0x3bafU, 1U, 0U, 0U}, + // x = -0x1.55p-5, expf16(x) = 0x1.ebp-1 (RZ) + {0xa954U, 0x3bacU, 1U, 0U, 0U}, + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using FPBits = fputil::FPBits; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When 0 < |x| <= 2^(-5), or |x| >= 12, or x is NaN. + if (LIBC_UNLIKELY(x_abs <= 0x2800U || x_abs >= 0x4a00U)) { + // exp(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // When x >= 12. + if (x_bits.is_pos() && x_abs >= 0x4a00U) { + // exp(+inf) = +inf + if (x_bits.is_inf()) + return FPBits::inf().get_val(); + + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_UPWARD: + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW); + return FPBits::inf().get_val(); + default: + return FPBits::max_normal().get_val(); + } + } + + // When x <= -18. + if (x_u >= 0xcc80U) { + // exp(-inf) = +0 + if (x_bits.is_inf()) + return FPBits::zero().get_val(); + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); + + switch (fputil::quick_get_round()) { + case FE_UPWARD: + return FPBits::min_subnormal().get_val(); + default: + return FPBits::zero().get_val(); + } + } + + // When 0 < |x| <= 2^(-5). + if (x_abs <= 0x2800U && !x_bits.is_zero()) { +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXPF16_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + float xf = x; + // Degree-3 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]); + // > 1 + x * P; + return fputil::cast( + fputil::polyeval(xf, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f)); + } + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXPF16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // exp(x) = exp(hi + mid) * exp(lo) + auto [exp_hi_mid, exp_lo] = exp_range_reduction(x); + return fputil::cast(exp_hi_mid * exp_lo); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_H diff --git a/libc/src/__support/math/expf16_utils.h b/libc/src/__support/math/expf16_utils.h new file mode 100644 index 0000000000000..bebb72b09b886 --- /dev/null +++ b/libc/src/__support/math/expf16_utils.h @@ -0,0 +1,89 @@ +//===-- Common utils for expf16 functions -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_UTILS_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/CPP/array.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > for i from -18 to 12 do print(round(exp(i), SG, RN)); +static constexpr cpp::array EXP_HI = { + 0x1.05a628p-26f, 0x1.639e32p-25f, 0x1.e355bcp-24f, 0x1.4875cap-22f, + 0x1.be6c7p-21f, 0x1.2f6054p-19f, 0x1.9c54c4p-18f, 0x1.183542p-16f, + 0x1.7cd79cp-15f, 0x1.02cf22p-13f, 0x1.5fc21p-12f, 0x1.de16bap-11f, + 0x1.44e52p-9f, 0x1.b993fep-8f, 0x1.2c155cp-6f, 0x1.97db0cp-5f, + 0x1.152aaap-3f, 0x1.78b564p-2f, 0x1p+0f, 0x1.5bf0a8p+1f, + 0x1.d8e64cp+2f, 0x1.415e5cp+4f, 0x1.b4c902p+5f, 0x1.28d38ap+7f, + 0x1.936dc6p+8f, 0x1.122886p+10f, 0x1.749ea8p+11f, 0x1.fa7158p+12f, + 0x1.5829dcp+14f, 0x1.d3c448p+15f, 0x1.3de166p+17f, +}; + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > for i from 0 to 7 do print(round(exp(i * 2^-3), SG, RN)); +static constexpr cpp::array EXP_MID = { + 0x1p+0f, 0x1.221604p+0f, 0x1.48b5e4p+0f, 0x1.747a52p+0f, + 0x1.a61298p+0f, 0x1.de455ep+0f, 0x1.0ef9dcp+1f, 0x1.330e58p+1f, +}; + +struct ExpRangeReduction { + float exp_hi_mid; + float exp_lo; +}; + +static constexpr ExpRangeReduction exp_range_reduction(float16 x) { + // For -18 < x < 12, to compute exp(x), we perform the following range + // reduction: find hi, mid, lo, such that: + // x = hi + mid + lo, in which + // hi is an integer, + // mid * 2^3 is an integer, + // -2^(-4) <= lo < 2^(-4). + // In particular, + // hi + mid = round(x * 2^3) * 2^(-3). + // Then, + // exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo). + // We store exp(hi) and exp(mid) in the lookup tables EXP_HI and EXP_MID + // respectively. exp(lo) is computed using a degree-3 minimax polynomial + // generated by Sollya. + + float xf = x; + float kf = fputil::nearest_integer(xf * 0x1.0p+3f); + int x_hi_mid = static_cast(kf); + int x_hi = x_hi_mid >> 3; + int x_mid = x_hi_mid & 0x7; + // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x + float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf); + + float exp_hi = EXP_HI[x_hi + 18]; + float exp_mid = EXP_MID[x_mid]; + // Degree-3 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-4, 2^-4]); + // > 1 + x * P; + float exp_lo = + fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.001p-1f, 0x1.555ddep-3f); + return {exp_hi * exp_mid, exp_lo}; +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPF16_UTILS_H diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index 0748e1cb8a8b4..a7dd7ce0ae25a 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -1135,7 +1135,7 @@ LIBC_INLINE StrToNumResult strtofloatingpoint(const char *__restrict src) { int error = 0; - size_t index = static_cast(first_non_whitespace(src) - src); + size_t index = first_non_whitespace(src); if (src[index] == '+' || src[index] == '-') { sign = src[index]; diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h index 76a99a8948941..d332c929f2c31 100644 --- a/libc/src/__support/str_to_integer.h +++ b/libc/src/__support/str_to_integer.h @@ -29,17 +29,16 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -// Returns a pointer to the first character in src that is not a whitespace +// Returns the idx to the first character in src that is not a whitespace // character (as determined by isspace()) -// TODO: Change from returning a pointer to returning a length. -LIBC_INLINE const char * +LIBC_INLINE size_t first_non_whitespace(const char *__restrict src, size_t src_len = cpp::numeric_limits::max()) { size_t src_cur = 0; while (src_cur < src_len && internal::isspace(src[src_cur])) { ++src_cur; } - return src + src_cur; + return src_cur; } // checks if the next 3 characters of the string pointer are the start of a @@ -96,7 +95,7 @@ strtointeger(const char *__restrict src, int base, if (base < 0 || base == 1 || base > 36) return {0, 0, EINVAL}; - src_cur = static_cast(first_non_whitespace(src, src_len) - src); + src_cur = first_non_whitespace(src, src_len); char result_sign = '+'; if (src[src_cur] == '+' || src[src_cur] == '-') { diff --git a/libc/src/__support/wcs_to_integer.h b/libc/src/__support/wcs_to_integer.h new file mode 100644 index 0000000000000..4254bd860f77a --- /dev/null +++ b/libc/src/__support/wcs_to_integer.h @@ -0,0 +1,155 @@ +//===-- Widechar string to integer conversion utils -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H +#define LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H + +#include "hdr/errno_macros.h" // For ERANGE +#include "src/__support/CPP/limits.h" +#include "src/__support/CPP/type_traits.h" +#include "src/__support/CPP/type_traits/make_unsigned.h" +#include "src/__support/big_int.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/str_to_num_result.h" +#include "src/__support/uint128.h" +#include "src/__support/wctype_utils.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +// Returns the idx of the first character in src that is not a whitespace +// character (as determined by iswspace()) +LIBC_INLINE size_t +first_non_whitespace(const wchar_t *__restrict src, + size_t src_len = cpp::numeric_limits::max()) { + size_t src_cur = 0; + while (src_cur < src_len && internal::iswspace(src[src_cur])) { + ++src_cur; + } + return src_cur; +} + +// checks if the next 3 characters of the string pointer are the start of a +// hexadecimal number. Does not advance the string pointer. +LIBC_INLINE bool +is_hex_start(const wchar_t *__restrict src, + size_t src_len = cpp::numeric_limits::max()) { + if (src_len < 3) + return false; + return *src == L'0' && towlower(*(src + 1)) == L'x' && iswalnum(*(src + 2)) && + b36_wchar_to_int(*(src + 2)) < 16; +} + +// Takes the address of the string pointer and parses the base from the start of +// it. +LIBC_INLINE int infer_base(const wchar_t *__restrict src, size_t src_len) { + // A hexadecimal number is defined as "the prefix 0x or 0X followed by a + // sequence of the decimal digits and the letters a (or A) through f (or F) + // with values 10 through 15 respectively." (C standard 6.4.4.1) + if (is_hex_start(src, src_len)) + return 16; + // An octal number is defined as "the prefix 0 optionally followed by a + // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any + // number that starts with 0, including just 0, is an octal number. + if (src_len > 0 && src[0] == L'0') + return 8; + // A decimal number is defined as beginning "with a nonzero digit and + // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1) + return 10; +} + +template +LIBC_INLINE StrToNumResult +wcstointeger(const wchar_t *__restrict src, int base, + const size_t src_len = cpp::numeric_limits::max()) { + using ResultType = make_integral_or_big_int_unsigned_t; + + ResultType result = 0; + + bool is_number = false; + size_t src_cur = 0; + int error_val = 0; + + if (src_len == 0) + return {0, 0, 0}; + + if (base < 0 || base == 1 || base > 36) + return {0, 0, EINVAL}; + + src_cur = first_non_whitespace(src, src_len); + + wchar_t result_sign = L'+'; + if (src[src_cur] == L'+' || src[src_cur] == L'-') { + result_sign = src[src_cur]; + ++src_cur; + } + + if (base == 0) + base = infer_base(src + src_cur, src_len - src_cur); + + if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur)) + src_cur = src_cur + 2; + + constexpr bool IS_UNSIGNED = cpp::is_unsigned_v; + const bool is_positive = (result_sign == L'+'); + + ResultType constexpr NEGATIVE_MAX = + !IS_UNSIGNED ? static_cast(cpp::numeric_limits::max()) + 1 + : cpp::numeric_limits::max(); + ResultType const abs_max = + (is_positive ? cpp::numeric_limits::max() : NEGATIVE_MAX); + ResultType const abs_max_div_by_base = + abs_max / static_cast(base); + + while (src_cur < src_len && iswalnum(src[src_cur])) { + int cur_digit = b36_wchar_to_int(src[src_cur]); + if (cur_digit >= base) + break; + + is_number = true; + ++src_cur; + + // If the number has already hit the maximum value for the current type then + // the result cannot change, but we still need to advance src to the end of + // the number. + if (result == abs_max) { + error_val = ERANGE; + continue; + } + + if (result > abs_max_div_by_base) { + result = abs_max; + error_val = ERANGE; + } else { + result = result * static_cast(base); + } + if (result > abs_max - static_cast(cur_digit)) { + result = abs_max; + error_val = ERANGE; + } else { + result = result + static_cast(cur_digit); + } + } + + ptrdiff_t str_len = is_number ? static_cast(src_cur) : 0; + + if (error_val == ERANGE) { + if (is_positive || IS_UNSIGNED) + return {cpp::numeric_limits::max(), str_len, error_val}; + else // T is signed and there is a negative overflow + return {cpp::numeric_limits::min(), str_len, error_val}; + } + + return {static_cast(is_positive ? result : -result), str_len, error_val}; +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index fd1e6c0d648aa..6c3f28f423c7b 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1332,19 +1332,8 @@ add_entrypoint_object( HDRS ../expf16.h DEPENDS - .expxf16 - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.CPP.array - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization + libc.src.__support.math.expf16 + libc.src.errno.errno ) add_entrypoint_object( @@ -5075,11 +5064,10 @@ add_header_library( HDRS expxf16.h DEPENDS - libc.src.__support.CPP.array libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval libc.src.__support.macros.attributes + libc.src.__support.math.expf16_utils ) diff --git a/libc/src/math/generic/expf16.cpp b/libc/src/math/generic/expf16.cpp index 1af9b3ec9ad6e..ad213e237f021 100644 --- a/libc/src/math/generic/expf16.cpp +++ b/libc/src/math/generic/expf16.cpp @@ -7,120 +7,11 @@ //===----------------------------------------------------------------------===// #include "src/math/expf16.h" -#include "expxf16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -namespace LIBC_NAMESPACE_DECL { - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr fputil::ExceptValues EXPF16_EXCEPTS_LO = {{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.de4p-8, expf16(x) = 0x1.01cp+0 (RZ) - {0x1f79U, 0x3c07U, 1U, 0U, 0U}, - // x = 0x1.73cp-6, expf16(x) = 0x1.05cp+0 (RZ) - {0x25cfU, 0x3c17U, 1U, 0U, 0U}, -}}; - -static constexpr fputil::ExceptValues EXPF16_EXCEPTS_HI = {{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.c34p+0, expf16(x) = 0x1.74cp+2 (RZ) - {0x3f0dU, 0x45d3U, 1U, 0U, 1U}, - // x = -0x1.488p-5, expf16(x) = 0x1.ebcp-1 (RZ) - {0xa922U, 0x3bafU, 1U, 0U, 0U}, - // x = -0x1.55p-5, expf16(x) = 0x1.ebp-1 (RZ) - {0xa954U, 0x3bacU, 1U, 0U, 0U}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, expf16, (float16 x)) { - using FPBits = fputil::FPBits; - FPBits x_bits(x); - - uint16_t x_u = x_bits.uintval(); - uint16_t x_abs = x_u & 0x7fffU; - - // When 0 < |x| <= 2^(-5), or |x| >= 12, or x is NaN. - if (LIBC_UNLIKELY(x_abs <= 0x2800U || x_abs >= 0x4a00U)) { - // exp(NaN) = NaN - if (x_bits.is_nan()) { - if (x_bits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // When x >= 12. - if (x_bits.is_pos() && x_abs >= 0x4a00U) { - // exp(+inf) = +inf - if (x_bits.is_inf()) - return FPBits::inf().get_val(); +#include "src/__support/math/expf16.h" - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_UPWARD: - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW); - return FPBits::inf().get_val(); - default: - return FPBits::max_normal().get_val(); - } - } - - // When x <= -18. - if (x_u >= 0xcc80U) { - // exp(-inf) = +0 - if (x_bits.is_inf()) - return FPBits::zero().get_val(); - - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); - - switch (fputil::quick_get_round()) { - case FE_UPWARD: - return FPBits::min_subnormal().get_val(); - default: - return FPBits::zero().get_val(); - } - } - - // When 0 < |x| <= 2^(-5). - if (x_abs <= 0x2800U && !x_bits.is_zero()) { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXPF16_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - float xf = x; - // Degree-3 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]); - // > 1 + x * P; - return fputil::cast( - fputil::polyeval(xf, 0x1p+0f, 0x1p+0f, 0x1.0004p-1f, 0x1.555778p-3f)); - } - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXPF16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS +namespace LIBC_NAMESPACE_DECL { - // exp(x) = exp(hi + mid) * exp(lo) - auto [exp_hi_mid, exp_lo] = exp_range_reduction(x); - return fputil::cast(exp_hi_mid * exp_lo); -} +LLVM_LIBC_FUNCTION(float16, expf16, (float16 x)) { return math::expf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h index 67bb248307519..05ac95d586823 100644 --- a/libc/src/math/generic/expxf16.h +++ b/libc/src/math/generic/expxf16.h @@ -9,9 +9,7 @@ #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H #define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H -#include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/FPUtil/nearest_integer.h" @@ -19,69 +17,9 @@ #include "src/__support/macros/config.h" #include -namespace LIBC_NAMESPACE_DECL { - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from -18 to 12 do print(round(exp(i), SG, RN)); -static constexpr cpp::array EXP_HI = { - 0x1.05a628p-26f, 0x1.639e32p-25f, 0x1.e355bcp-24f, 0x1.4875cap-22f, - 0x1.be6c7p-21f, 0x1.2f6054p-19f, 0x1.9c54c4p-18f, 0x1.183542p-16f, - 0x1.7cd79cp-15f, 0x1.02cf22p-13f, 0x1.5fc21p-12f, 0x1.de16bap-11f, - 0x1.44e52p-9f, 0x1.b993fep-8f, 0x1.2c155cp-6f, 0x1.97db0cp-5f, - 0x1.152aaap-3f, 0x1.78b564p-2f, 0x1p+0f, 0x1.5bf0a8p+1f, - 0x1.d8e64cp+2f, 0x1.415e5cp+4f, 0x1.b4c902p+5f, 0x1.28d38ap+7f, - 0x1.936dc6p+8f, 0x1.122886p+10f, 0x1.749ea8p+11f, 0x1.fa7158p+12f, - 0x1.5829dcp+14f, 0x1.d3c448p+15f, 0x1.3de166p+17f, -}; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 7 do print(round(exp(i * 2^-3), SG, RN)); -static constexpr cpp::array EXP_MID = { - 0x1p+0f, 0x1.221604p+0f, 0x1.48b5e4p+0f, 0x1.747a52p+0f, - 0x1.a61298p+0f, 0x1.de455ep+0f, 0x1.0ef9dcp+1f, 0x1.330e58p+1f, -}; - -struct ExpRangeReduction { - float exp_hi_mid; - float exp_lo; -}; +#include "src/__support/math/expf16_utils.h" -LIBC_INLINE ExpRangeReduction exp_range_reduction(float16 x) { - // For -18 < x < 12, to compute exp(x), we perform the following range - // reduction: find hi, mid, lo, such that: - // x = hi + mid + lo, in which - // hi is an integer, - // mid * 2^3 is an integer, - // -2^(-4) <= lo < 2^(-4). - // In particular, - // hi + mid = round(x * 2^3) * 2^(-3). - // Then, - // exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo). - // We store exp(hi) and exp(mid) in the lookup tables EXP_HI and EXP_MID - // respectively. exp(lo) is computed using a degree-3 minimax polynomial - // generated by Sollya. - - float xf = x; - float kf = fputil::nearest_integer(xf * 0x1.0p+3f); - int x_hi_mid = static_cast(kf); - int x_hi = x_hi_mid >> 3; - int x_mid = x_hi_mid & 0x7; - // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x - float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf); - - float exp_hi = EXP_HI[x_hi + 18]; - float exp_mid = EXP_MID[x_mid]; - // Degree-3 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-4, 2^-4]); - // > 1 + x * P; - float exp_lo = - fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.001p-1f, 0x1.555ddep-3f); - return {exp_hi * exp_mid, exp_lo}; -} +namespace LIBC_NAMESPACE_DECL { // Generated by Sollya with the following commands: // > display = hexadecimal; diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 9f626ed31cc07..e54d7a5c9638b 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -141,6 +141,17 @@ add_libc_test( libc.src.__support.str_to_integer ) +add_libc_test( + wcs_to_integer_test + SUITE + libc-support-tests + SRCS + wcs_to_integer_test.cpp + DEPENDS + libc.src.__support.integer_literals + libc.src.__support.wcs_to_integer +) + add_libc_test( integer_to_string_test SUITE diff --git a/libc/test/src/__support/wcs_to_integer_test.cpp b/libc/test/src/__support/wcs_to_integer_test.cpp new file mode 100644 index 0000000000000..e4107929c15fc --- /dev/null +++ b/libc/test/src/__support/wcs_to_integer_test.cpp @@ -0,0 +1,239 @@ +//===-- Unittests for wcs_to_integer --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/libc_errno.h" +#include "src/__support/wcs_to_integer.h" +#include + +#include "test/UnitTest/Test.h" + +// This file is for testing the src_len argument and other internal interface +// features. Primary testing is done through the public interface. + +TEST(LlvmLibcStrToIntegerTest, SimpleLength) { + auto result = LIBC_NAMESPACE::internal::wcstointeger(L"12345", 10, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(5)); + ASSERT_EQ(result.value, 12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"12345", 10, 2); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(2)); + ASSERT_EQ(result.value, 12); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"12345", 10, 0); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); +} + +TEST(LlvmLibcStrToIntegerTest, LeadingSpaces) { + auto result = + LIBC_NAMESPACE::internal::wcstointeger(L" 12345", 10, 15); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(10)); + ASSERT_EQ(result.value, 12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L" 12345", 10, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(10)); + ASSERT_EQ(result.value, 12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L" 12345", 10, 7); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(7)); + ASSERT_EQ(result.value, 12); + + result = LIBC_NAMESPACE::internal::wcstointeger(L" 12345", 10, 5); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); + + result = LIBC_NAMESPACE::internal::wcstointeger(L" 12345", 10, 0); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); +} + +TEST(LlvmLibcStrToIntegerTest, LeadingSign) { + auto result = LIBC_NAMESPACE::internal::wcstointeger(L"+12345", 10, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, 12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"-12345", 10, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, -12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"+12345", 10, 6); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, 12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"-12345", 10, 6); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, -12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"+12345", 10, 3); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(3)); + ASSERT_EQ(result.value, 12); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"-12345", 10, 3); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(3)); + ASSERT_EQ(result.value, -12); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"+12345", 10, 1); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"-12345", 10, 1); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"+12345", 10, 0); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"-12345", 10, 0); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); +} + +TEST(LlvmLibcStrToIntegerTest, Base16PrefixAutoSelect) { + auto result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 0, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(7)); + ASSERT_EQ(result.value, 0x12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 0, 7); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(7)); + ASSERT_EQ(result.value, 0x12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 0, 5); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(5)); + ASSERT_EQ(result.value, 0x123); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 0, 2); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(1)); + ASSERT_EQ(result.value, 0); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 0, 0); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); +} + +TEST(LlvmLibcStrToIntegerTest, Base16PrefixManualSelect) { + auto result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 16, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(7)); + ASSERT_EQ(result.value, 0x12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 16, 7); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(7)); + ASSERT_EQ(result.value, 0x12345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 16, 5); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(5)); + ASSERT_EQ(result.value, 0x123); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 16, 2); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(1)); + ASSERT_EQ(result.value, 0); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"0x12345", 16, 0); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); +} + +TEST(LlvmLibcStrToIntegerTest, Base8PrefixAutoSelect) { + auto result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 0, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, 012345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 0, 6); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, 012345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 0, 4); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(4)); + ASSERT_EQ(result.value, 0123); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 0, 1); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(1)); + ASSERT_EQ(result.value, 0); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 0, 0); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); +} + +TEST(LlvmLibcStrToIntegerTest, Base8PrefixManualSelect) { + auto result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 8, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, 012345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 8, 6); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, 012345); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 8, 4); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(4)); + ASSERT_EQ(result.value, 0123); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 8, 1); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(1)); + ASSERT_EQ(result.value, 0); + + result = LIBC_NAMESPACE::internal::wcstointeger(L"012345", 8, 0); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(0)); + ASSERT_EQ(result.value, 0); +} + +TEST(LlvmLibcStrToIntegerTest, CombinedTests) { + auto result = + LIBC_NAMESPACE::internal::wcstointeger(L" -0x123", 0, 10); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(10)); + ASSERT_EQ(result.value, -0x123); + + result = LIBC_NAMESPACE::internal::wcstointeger(L" -0x123", 0, 8); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(8)); + ASSERT_EQ(result.value, -0x1); + + result = LIBC_NAMESPACE::internal::wcstointeger(L" -0x123", 0, 7); + EXPECT_FALSE(result.has_error()); + EXPECT_EQ(result.parsed_len, ptrdiff_t(6)); + ASSERT_EQ(result.value, 0); +} diff --git a/libc/test/src/math/smoke/RoundToIntegerTest.h b/libc/test/src/math/smoke/RoundToIntegerTest.h index 745ccbc748ecd..2b460aef6ef32 100644 --- a/libc/test/src/math/smoke/RoundToIntegerTest.h +++ b/libc/test/src/math/smoke/RoundToIntegerTest.h @@ -113,7 +113,8 @@ class RoundToIntegerTestTemplate } void testSubnormalRange(RoundToIntegerFunc func) { - constexpr int COUNT = 1'000'001; + // Arbitrary, trades off completeness with testing time (esp. on failure) + constexpr int COUNT = 1'000; constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( static_cast((MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT), StorageType(1)); diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp index 903084da053a1..ed825aa0a2adf 100644 --- a/libcxx/src/atomic.cpp +++ b/libcxx/src/atomic.cpp @@ -41,6 +41,10 @@ // OpenBSD has no indirect syscalls # define _LIBCPP_FUTEX(...) futex(__VA_ARGS__) +#elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK) + +# include + #else // <- Add other operating systems here // Baseline needs no new headers @@ -65,24 +69,15 @@ static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const vo #elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK) -extern "C" int __ulock_wait( - uint32_t operation, void* addr, uint64_t value, uint32_t timeout); /* timeout is specified in microseconds */ -extern "C" int __ulock_wake(uint32_t operation, void* addr, uint64_t wake_value); - -// https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/sys/ulock.h#L82 -# define UL_COMPARE_AND_WAIT64 5 -# define ULF_WAKE_ALL 0x00000100 - static void __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) { static_assert(sizeof(__cxx_atomic_contention_t) == 8, "Waiting on 8 bytes value"); - __ulock_wait(UL_COMPARE_AND_WAIT64, const_cast<__cxx_atomic_contention_t*>(__ptr), __val, 0); + os_sync_wait_on_address(const_cast<__cxx_atomic_contention_t*>(__ptr), __val, 8, OS_SYNC_WAIT_ON_ADDRESS_NONE); } static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) { static_assert(sizeof(__cxx_atomic_contention_t) == 8, "Waking up on 8 bytes value"); - __ulock_wake( - UL_COMPARE_AND_WAIT64 | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<__cxx_atomic_contention_t*>(__ptr), 0); + os_sync_wake_by_address_all(const_cast<__cxx_atomic_contention_t*>(__ptr), 8, OS_SYNC_WAKE_BY_ADDRESS_NONE); } #elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8 diff --git a/libcxx/test/configs/stdlib-libstdc++.cfg.in b/libcxx/test/configs/stdlib-libstdc++.cfg.in index b9672f038a763..3ff0c542f0630 100644 --- a/libcxx/test/configs/stdlib-libstdc++.cfg.in +++ b/libcxx/test/configs/stdlib-libstdc++.cfg.in @@ -9,7 +9,8 @@ # # $ ./libcxx/utils/libcxx-lit -sv libcxx/test/std --param libstdcxx_install_prefix=/opt/homebrew/Cellar/gcc/14.1.0_1 \ # --param libstdcxx_version=14 \ -# --param libstdcxx_triple=aarch64-apple-darwin22 +# --param libstdcxx_triple=aarch64-apple-darwin22 \ +# --param stdlib=libstdc++ # lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') diff --git a/libcxx/test/libcxx/language.support/support.types/cstddef.compile.pass.cpp b/libcxx/test/extensions/all/cstddef.compile.pass.cpp similarity index 99% rename from libcxx/test/libcxx/language.support/support.types/cstddef.compile.pass.cpp rename to libcxx/test/extensions/all/cstddef.compile.pass.cpp index 514353a103029..c6f670d7bfc06 100644 --- a/libcxx/test/libcxx/language.support/support.types/cstddef.compile.pass.cpp +++ b/libcxx/test/extensions/all/cstddef.compile.pass.cpp @@ -11,6 +11,7 @@ // appear to provide that behavior too. #include + #include "test_macros.h" using PtrdiffT = ::ptrdiff_t; diff --git a/libcxx/test/libcxx/clang_modules_include.gen.py b/libcxx/test/extensions/clang/clang_modules_include.gen.py similarity index 100% rename from libcxx/test/libcxx/clang_modules_include.gen.py rename to libcxx/test/extensions/clang/clang_modules_include.gen.py diff --git a/libcxx/test/extensions/clang/lit.local.cfg b/libcxx/test/extensions/clang/lit.local.cfg new file mode 100644 index 0000000000000..b0a1c7d9b17e4 --- /dev/null +++ b/libcxx/test/extensions/clang/lit.local.cfg @@ -0,0 +1,4 @@ + +# Only libc++ supports clang-specific extensions +if "stdlib=libc++" not in config.available_features: + config.unsupported = True diff --git a/libcxx/test/libcxx/include_as_c.sh.cpp b/libcxx/test/extensions/libcxx/include_as_c.sh.cpp similarity index 100% rename from libcxx/test/libcxx/include_as_c.sh.cpp rename to libcxx/test/extensions/libcxx/include_as_c.sh.cpp diff --git a/libcxx/test/libcxx/libcpp_version.gen.py b/libcxx/test/extensions/libcxx/libcpp_version.gen.py similarity index 93% rename from libcxx/test/libcxx/libcpp_version.gen.py rename to libcxx/test/extensions/libcxx/libcpp_version.gen.py index b30623fe2c388..ebeab777c934f 100644 --- a/libcxx/test/libcxx/libcpp_version.gen.py +++ b/libcxx/test/extensions/libcxx/libcpp_version.gen.py @@ -6,7 +6,7 @@ # # ===----------------------------------------------------------------------===## -# Test that all headers define the _LIBCPP_VERSION macro. +# Test that all public headers define the _LIBCPP_VERSION macro. # RUN: %{python} %s %{libcxx-dir}/utils diff --git a/libcxx/test/extensions/libcxx/lit.local.cfg b/libcxx/test/extensions/libcxx/lit.local.cfg new file mode 100644 index 0000000000000..2b128105558b2 --- /dev/null +++ b/libcxx/test/extensions/libcxx/lit.local.cfg @@ -0,0 +1,3 @@ + +if "stdlib=libc++" not in config.available_features: + config.unsupported = True diff --git a/libcxx/test/libcxx/no_assert_include.gen.py b/libcxx/test/extensions/libcxx/no_assert_include.gen.py similarity index 100% rename from libcxx/test/libcxx/no_assert_include.gen.py rename to libcxx/test/extensions/libcxx/no_assert_include.gen.py diff --git a/libcxx/test/selftest/lit.local.cfg b/libcxx/test/selftest/lit.local.cfg deleted file mode 100644 index 4467d8070cc70..0000000000000 --- a/libcxx/test/selftest/lit.local.cfg +++ /dev/null @@ -1,5 +0,0 @@ -# The tests in this directory need to run Python -import shlex -import sys - -config.substitutions.append(("%{python}", shlex.quote(sys.executable))) diff --git a/libcxx/test/libcxx/double_include.gen.py b/libcxx/test/std/double_include.gen.py similarity index 93% rename from libcxx/test/libcxx/double_include.gen.py rename to libcxx/test/std/double_include.gen.py index f58e72f94a353..fcf3b9a8fa2e0 100644 --- a/libcxx/test/libcxx/double_include.gen.py +++ b/libcxx/test/std/double_include.gen.py @@ -28,6 +28,9 @@ {lit_header_restrictions.get(header, '')} {lit_header_undeprecations.get(header, '')} +// We're using compiler-specific flags in this test +// REQUIRES: (gcc || clang) + // RUN: %{{cxx}} -c %s -o %t.first.o %{{flags}} %{{compile_flags}} // RUN: %{{cxx}} -c %s -o %t.second.o -DWITH_MAIN %{{flags}} %{{compile_flags}} // RUN: %{{cxx}} -o %t.exe %t.first.o %t.second.o %{{flags}} %{{link_flags}} diff --git a/libcxx/test/libcxx/header_inclusions.gen.py b/libcxx/test/std/header_inclusions.gen.py similarity index 91% rename from libcxx/test/libcxx/header_inclusions.gen.py rename to libcxx/test/std/header_inclusions.gen.py index e00cf180d17ad..8ff93810069fa 100644 --- a/libcxx/test/libcxx/header_inclusions.gen.py +++ b/libcxx/test/std/header_inclusions.gen.py @@ -48,6 +48,9 @@ //--- {header}.compile.pass.cpp // UNSUPPORTED: FROZEN-CXX03-HEADERS-FIXME +// TODO: This is currently a libc++-specific way of testing the includes, but is a requirement for all implementation +// REQUIRES: stdlib=libc++ + {lit_header_restrictions.get(header, '')} {lit_header_undeprecations.get(header, '')} diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp index 8e57e8913dcbe..09086a4c046d6 100644 --- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp @@ -7,6 +7,9 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 +// The Clang version that Android currently uses in the CI is too old. +// XFAIL: LIBCXX-ANDROID-FIXME + // type_traits // is_bounded_array diff --git a/lld/Common/DriverDispatcher.cpp b/lld/Common/DriverDispatcher.cpp index fe18c320983fa..34f0ed24b3df0 100644 --- a/lld/Common/DriverDispatcher.cpp +++ b/lld/Common/DriverDispatcher.cpp @@ -45,7 +45,7 @@ static cl::TokenizerCallback getDefaultQuotingStyle() { static bool isPETargetName(StringRef s) { return s == "i386pe" || s == "i386pep" || s == "thumb2pe" || s == "arm64pe" || - s == "arm64ecpe"; + s == "arm64ecpe" || s == "arm64xpe"; } static std::optional isPETarget(llvm::ArrayRef args) { diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index 98d48bdfcf311..5098dbd77b4fd 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -448,6 +448,8 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, add("-machine:arm64"); else if (s == "arm64ecpe") add("-machine:arm64ec"); + else if (s == "arm64xpe") + add("-machine:arm64x"); else error("unknown parameter: -m" + s); } diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index 907d2d87dda5c..618b888504320 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -31,6 +31,12 @@ ARM64EC-SAME: -machine:arm64ec ARM64EC-SAME: -alternatename:__image_base__=__ImageBase ARM64EC-SAME: foo.o +RUN: ld.lld -### foo.o -m arm64xpe 2>&1 | FileCheck -check-prefix=ARM64X %s +ARM64X: -out:a.exe +ARM64X-SAME: -machine:arm64x +ARM64X-SAME: -alternatename:__image_base__=__ImageBase +ARM64X-SAME: foo.o + RUN: ld.lld -### foo.o -m i386pep -shared 2>&1 | FileCheck -check-prefix=SHARED %s RUN: ld.lld -### foo.o -m i386pep --shared 2>&1 | FileCheck -check-prefix=SHARED %s RUN: ld.lld -### foo.o -m i386pep --dll 2>&1 | FileCheck -check-prefix=SHARED %s diff --git a/lldb/include/lldb/Breakpoint/Breakpoint.h b/lldb/include/lldb/Breakpoint/Breakpoint.h index b200a1e4893df..26a5e901a0d7e 100644 --- a/lldb/include/lldb/Breakpoint/Breakpoint.h +++ b/lldb/include/lldb/Breakpoint/Breakpoint.h @@ -397,16 +397,12 @@ class Breakpoint : public std::enable_shared_from_this, /// Set the breakpoint's condition. /// /// \param[in] condition - /// The condition expression to evaluate when the breakpoint is hit. - /// Pass in nullptr to clear the condition. - void SetCondition(const char *condition); + /// The condition to evaluate when the breakpoint is hit. + /// Pass in an empty condition to clear the condition. + void SetCondition(StopCondition condition); - /// Return a pointer to the text of the condition expression. - /// - /// \return - /// A pointer to the condition expression text, or nullptr if no - // condition has been set. - const char *GetConditionText() const; + /// Return the breakpoint condition. + const StopCondition &GetCondition() const; // The next section are various utility functions. diff --git a/lldb/include/lldb/Breakpoint/BreakpointLocation.h b/lldb/include/lldb/Breakpoint/BreakpointLocation.h index ce3a21f92bd46..ab2e5e170559d 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointLocation.h +++ b/lldb/include/lldb/Breakpoint/BreakpointLocation.h @@ -128,15 +128,11 @@ class BreakpointLocation /// Set the breakpoint location's condition. /// /// \param[in] condition - /// The condition expression to evaluate when the breakpoint is hit. - void SetCondition(const char *condition); + /// The condition to evaluate when the breakpoint is hit. + void SetCondition(StopCondition condition); - /// Return a pointer to the text of the condition expression. - /// - /// \return - /// A pointer to the condition expression text, or nullptr if no - // condition has been set. - const char *GetConditionText(size_t *hash = nullptr) const; + /// Return the breakpoint condition. + const StopCondition &GetCondition() const; bool ConditionSaysStop(ExecutionContext &exe_ctx, Status &error); diff --git a/lldb/include/lldb/Breakpoint/BreakpointOptions.h b/lldb/include/lldb/Breakpoint/BreakpointOptions.h index 7bf545717422f..2f73473c07e62 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointOptions.h +++ b/lldb/include/lldb/Breakpoint/BreakpointOptions.h @@ -12,6 +12,7 @@ #include #include +#include "lldb/Breakpoint/StopCondition.h" #include "lldb/Utility/Baton.h" #include "lldb/Utility/Flags.h" #include "lldb/Utility/StringList.h" @@ -245,18 +246,15 @@ friend class Breakpoint; const Baton *GetBaton() const; // Condition - /// Set the breakpoint option's condition. + /// Set the breakpoint stop condition. /// /// \param[in] condition - /// The condition expression to evaluate when the breakpoint is hit. - void SetCondition(const char *condition); + /// The condition to evaluate when the breakpoint is hit. + void SetCondition(StopCondition condition); - /// Return a pointer to the text of the condition expression. - /// - /// \return - /// A pointer to the condition expression text, or nullptr if no - // condition has been set. - const char *GetConditionText(size_t *hash = nullptr) const; + /// Return the breakpoint condition. + const StopCondition &GetCondition() const; + StopCondition &GetCondition(); // Enabled/Ignore Count @@ -390,9 +388,7 @@ friend class Breakpoint; /// Thread for which this breakpoint will stop. std::unique_ptr m_thread_spec_up; /// The condition to test. - std::string m_condition_text; - /// Its hash, so that locations know when the condition is updated. - size_t m_condition_text_hash; + StopCondition m_condition; /// If set, inject breakpoint condition into process. bool m_inject_condition; /// If set, auto-continue from breakpoint. diff --git a/lldb/include/lldb/Breakpoint/StopCondition.h b/lldb/include/lldb/Breakpoint/StopCondition.h new file mode 100644 index 0000000000000..485a615368400 --- /dev/null +++ b/lldb/include/lldb/Breakpoint/StopCondition.h @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_BREAKPOINT_STOPCONDITION_H +#define LLDB_BREAKPOINT_STOPCONDITION_H + +#include "lldb/lldb-private.h" +#include "llvm/ADT/StringRef.h" + +namespace lldb_private { + +class StopCondition { +public: + StopCondition() = default; + StopCondition(std::string text, + lldb::LanguageType language = lldb::eLanguageTypeUnknown) + : m_language(language) { + SetText(std::move(text)); + } + + explicit operator bool() const { return !m_text.empty(); } + + llvm::StringRef GetText() const { return m_text; } + + void SetText(std::string text) { + static std::hash hasher; + m_text = std::move(text); + m_hash = hasher(text); + } + + size_t GetHash() const { return m_hash; } + + lldb::LanguageType GetLanguage() const { return m_language; } + + void SetLanguage(lldb::LanguageType language) { m_language = language; } + +private: + /// The condition to test. + std::string m_text; + + /// Its hash, so that locations know when the condition is updated. + size_t m_hash = 0; + + /// The language for this condition. + lldb::LanguageType m_language = lldb::eLanguageTypeUnknown; +}; + +} // namespace lldb_private + +#endif // LLDB_BREAKPOINT_STOPCONDITION_H diff --git a/lldb/include/lldb/Host/HostThread.h b/lldb/include/lldb/Host/HostThread.h index d3477e115e2d8..c969492f5b20a 100644 --- a/lldb/include/lldb/Host/HostThread.h +++ b/lldb/include/lldb/Host/HostThread.h @@ -43,6 +43,8 @@ class HostThread { bool EqualsThread(lldb::thread_t thread) const; + bool HasThread() const; + private: std::shared_ptr m_native_thread; }; diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index a8892e9c43225..637b0774ec7db 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -2547,6 +2547,8 @@ void PruneThreadPlans(); bool CurrentThreadIsPrivateStateThread(); + bool CurrentThreadPosesAsPrivateStateThread(); + virtual Status SendEventData(const char *data) { return Status::FromErrorString( "Sending an event is not supported for this process."); diff --git a/lldb/include/lldb/Utility/LLDBLog.h b/lldb/include/lldb/Utility/LLDBLog.h index c7de41e74e85b..18e4a3ca73507 100644 --- a/lldb/include/lldb/Utility/LLDBLog.h +++ b/lldb/include/lldb/Utility/LLDBLog.h @@ -49,7 +49,8 @@ enum class LLDBLog : Log::MaskType { Watchpoints = Log::ChannelFlag<30>, OnDemand = Log::ChannelFlag<31>, Source = Log::ChannelFlag<32>, - LLVM_MARK_AS_BITMASK_ENUM(OnDemand), + Disassembler = Log::ChannelFlag<33>, + LLVM_MARK_AS_BITMASK_ENUM(Disassembler), }; LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 68f58bf1349a7..d9516670e3a89 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -908,7 +908,7 @@ def request_launch( disableASLR=False, disableSTDIO=False, shellExpandArguments=False, - runInTerminal=False, + console: Optional[str] = None, enableAutoVariableSummaries=False, displayExtendedBacktrace=False, enableSyntheticChildDebugging=False, @@ -958,8 +958,8 @@ def request_launch( args_dict["launchCommands"] = launchCommands if sourceMap: args_dict["sourceMap"] = sourceMap - if runInTerminal: - args_dict["runInTerminal"] = runInTerminal + if console: + args_dict["console"] = console if postRunCommands: args_dict["postRunCommands"] = postRunCommands if customFrameFormat: diff --git a/lldb/source/API/SBBreakpoint.cpp b/lldb/source/API/SBBreakpoint.cpp index 397afc1f10f94..07c0a2ea907ba 100644 --- a/lldb/source/API/SBBreakpoint.cpp +++ b/lldb/source/API/SBBreakpoint.cpp @@ -275,7 +275,7 @@ void SBBreakpoint::SetCondition(const char *condition) { if (bkpt_sp) { std::lock_guard guard( bkpt_sp->GetTarget().GetAPIMutex()); - bkpt_sp->SetCondition(condition); + bkpt_sp->SetCondition(StopCondition(condition)); } } @@ -288,7 +288,7 @@ const char *SBBreakpoint::GetCondition() { std::lock_guard guard( bkpt_sp->GetTarget().GetAPIMutex()); - return ConstString(bkpt_sp->GetConditionText()).GetCString(); + return ConstString(bkpt_sp->GetCondition().GetText()).GetCString(); } void SBBreakpoint::SetAutoContinue(bool auto_continue) { diff --git a/lldb/source/API/SBBreakpointLocation.cpp b/lldb/source/API/SBBreakpointLocation.cpp index 479354a62627d..e786435c4f8af 100644 --- a/lldb/source/API/SBBreakpointLocation.cpp +++ b/lldb/source/API/SBBreakpointLocation.cpp @@ -160,7 +160,7 @@ void SBBreakpointLocation::SetCondition(const char *condition) { if (loc_sp) { std::lock_guard guard( loc_sp->GetTarget().GetAPIMutex()); - loc_sp->SetCondition(condition); + loc_sp->SetCondition(StopCondition(condition)); } } @@ -173,7 +173,7 @@ const char *SBBreakpointLocation::GetCondition() { std::lock_guard guard( loc_sp->GetTarget().GetAPIMutex()); - return ConstString(loc_sp->GetConditionText()).GetCString(); + return ConstString(loc_sp->GetCondition().GetText()).GetCString(); } void SBBreakpointLocation::SetAutoContinue(bool auto_continue) { diff --git a/lldb/source/API/SBBreakpointName.cpp b/lldb/source/API/SBBreakpointName.cpp index 831260d44e8e7..0b588c38d5114 100644 --- a/lldb/source/API/SBBreakpointName.cpp +++ b/lldb/source/API/SBBreakpointName.cpp @@ -303,7 +303,7 @@ void SBBreakpointName::SetCondition(const char *condition) { std::lock_guard guard( m_impl_up->GetTarget()->GetAPIMutex()); - bp_name->GetOptions().SetCondition(condition); + bp_name->GetOptions().SetCondition(StopCondition(condition)); UpdateName(*bp_name); } @@ -317,7 +317,8 @@ const char *SBBreakpointName::GetCondition() { std::lock_guard guard( m_impl_up->GetTarget()->GetAPIMutex()); - return ConstString(bp_name->GetOptions().GetConditionText()).GetCString(); + return ConstString(bp_name->GetOptions().GetCondition().GetText()) + .GetCString(); } void SBBreakpointName::SetAutoContinue(bool auto_continue) { diff --git a/lldb/source/Breakpoint/Breakpoint.cpp b/lldb/source/Breakpoint/Breakpoint.cpp index ec27a7dc7b41f..d757bc41cdc32 100644 --- a/lldb/source/Breakpoint/Breakpoint.cpp +++ b/lldb/source/Breakpoint/Breakpoint.cpp @@ -440,13 +440,13 @@ const char *Breakpoint::GetQueueName() const { return m_options.GetThreadSpecNoCreate()->GetQueueName(); } -void Breakpoint::SetCondition(const char *condition) { - m_options.SetCondition(condition); +void Breakpoint::SetCondition(StopCondition condition) { + m_options.SetCondition(std::move(condition)); SendBreakpointChangedEvent(eBreakpointEventTypeConditionChanged); } -const char *Breakpoint::GetConditionText() const { - return m_options.GetConditionText(); +const StopCondition &Breakpoint::GetCondition() const { + return m_options.GetCondition(); } // This function is used when "baton" doesn't need to be freed diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index 7ac9c8f5ddc4d..443d4f50833d3 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -203,14 +203,13 @@ void BreakpointLocation::ClearCallback() { GetLocationOptions().ClearCallback(); } -void BreakpointLocation::SetCondition(const char *condition) { - GetLocationOptions().SetCondition(condition); +void BreakpointLocation::SetCondition(StopCondition condition) { + GetLocationOptions().SetCondition(std::move(condition)); SendBreakpointLocationChangedEvent(eBreakpointEventTypeConditionChanged); } -const char *BreakpointLocation::GetConditionText(size_t *hash) const { - return GetOptionsSpecifyingKind(BreakpointOptions::eCondition) - .GetConditionText(hash); +const StopCondition &BreakpointLocation::GetCondition() const { + return GetOptionsSpecifyingKind(BreakpointOptions::eCondition).GetCondition(); } bool BreakpointLocation::ConditionSaysStop(ExecutionContext &exe_ctx, @@ -219,10 +218,9 @@ bool BreakpointLocation::ConditionSaysStop(ExecutionContext &exe_ctx, std::lock_guard guard(m_condition_mutex); - size_t condition_hash; - const char *condition_text = GetConditionText(&condition_hash); + StopCondition condition = GetCondition(); - if (!condition_text) { + if (!condition) { m_user_expression_sp.reset(); return false; } @@ -231,19 +229,22 @@ bool BreakpointLocation::ConditionSaysStop(ExecutionContext &exe_ctx, DiagnosticManager diagnostics; - if (condition_hash != m_condition_hash || !m_user_expression_sp || + if (condition.GetHash() != m_condition_hash || !m_user_expression_sp || !m_user_expression_sp->IsParseCacheable() || !m_user_expression_sp->MatchesContext(exe_ctx)) { - LanguageType language = eLanguageTypeUnknown; - // See if we can figure out the language from the frame, otherwise use the - // default language: - CompileUnit *comp_unit = m_address.CalculateSymbolContextCompileUnit(); - if (comp_unit) - language = comp_unit->GetLanguage(); + LanguageType language = condition.GetLanguage(); + if (language == lldb::eLanguageTypeUnknown) { + // See if we can figure out the language from the frame, otherwise use the + // default language: + if (CompileUnit *comp_unit = + m_address.CalculateSymbolContextCompileUnit()) + language = comp_unit->GetLanguage(); + } m_user_expression_sp.reset(GetTarget().GetUserExpressionForLanguage( - condition_text, llvm::StringRef(), language, Expression::eResultTypeAny, - EvaluateExpressionOptions(), nullptr, error)); + condition.GetText(), llvm::StringRef(), language, + Expression::eResultTypeAny, EvaluateExpressionOptions(), nullptr, + error)); if (error.Fail()) { LLDB_LOGF(log, "Error getting condition expression: %s.", error.AsCString()); @@ -262,7 +263,7 @@ bool BreakpointLocation::ConditionSaysStop(ExecutionContext &exe_ctx, return true; } - m_condition_hash = condition_hash; + m_condition_hash = condition.GetHash(); } // We need to make sure the user sees any parse errors in their condition, so diff --git a/lldb/source/Breakpoint/BreakpointOptions.cpp b/lldb/source/Breakpoint/BreakpointOptions.cpp index 08e48c4921078..b0b794f0f93bf 100644 --- a/lldb/source/Breakpoint/BreakpointOptions.cpp +++ b/lldb/source/Breakpoint/BreakpointOptions.cpp @@ -106,8 +106,8 @@ const char *BreakpointOptions::g_option_names[( BreakpointOptions::BreakpointOptions(bool all_flags_set) : m_callback(nullptr), m_baton_is_command_baton(false), m_callback_is_synchronous(false), m_enabled(true), m_one_shot(false), - m_ignore_count(0), m_condition_text_hash(0), m_inject_condition(false), - m_auto_continue(false), m_set_flags(0) { + m_ignore_count(0), m_inject_condition(false), m_auto_continue(false), + m_set_flags(0) { if (all_flags_set) m_set_flags.Set(~((Flags::ValueType)0)); } @@ -117,11 +117,11 @@ BreakpointOptions::BreakpointOptions(const char *condition, bool enabled, bool auto_continue) : m_callback(nullptr), m_baton_is_command_baton(false), m_callback_is_synchronous(false), m_enabled(enabled), - m_one_shot(one_shot), m_ignore_count(ignore), m_condition_text_hash(0), + m_one_shot(one_shot), m_ignore_count(ignore), m_condition(condition), m_inject_condition(false), m_auto_continue(auto_continue) { m_set_flags.Set(eEnabled | eIgnoreCount | eOneShot | eAutoContinue); if (condition && *condition != '\0') { - SetCondition(condition); + SetCondition(StopCondition(condition)); } } @@ -135,8 +135,7 @@ BreakpointOptions::BreakpointOptions(const BreakpointOptions &rhs) m_auto_continue(rhs.m_auto_continue), m_set_flags(rhs.m_set_flags) { if (rhs.m_thread_spec_up != nullptr) m_thread_spec_up = std::make_unique(*rhs.m_thread_spec_up); - m_condition_text = rhs.m_condition_text; - m_condition_text_hash = rhs.m_condition_text_hash; + m_condition = rhs.m_condition; } // BreakpointOptions assignment operator @@ -151,8 +150,7 @@ operator=(const BreakpointOptions &rhs) { m_ignore_count = rhs.m_ignore_count; if (rhs.m_thread_spec_up != nullptr) m_thread_spec_up = std::make_unique(*rhs.m_thread_spec_up); - m_condition_text = rhs.m_condition_text; - m_condition_text_hash = rhs.m_condition_text_hash; + m_condition = rhs.m_condition; m_inject_condition = rhs.m_inject_condition; m_auto_continue = rhs.m_auto_continue; m_set_flags = rhs.m_set_flags; @@ -187,13 +185,11 @@ void BreakpointOptions::CopyOverSetOptions(const BreakpointOptions &incoming) if (incoming.m_set_flags.Test(eCondition)) { // If we're copying over an empty condition, mark it as unset. - if (incoming.m_condition_text.empty()) { - m_condition_text.clear(); - m_condition_text_hash = 0; + if (!incoming.m_condition) { + m_condition = StopCondition(); m_set_flags.Clear(eCondition); } else { - m_condition_text = incoming.m_condition_text; - m_condition_text_hash = incoming.m_condition_text_hash; + m_condition = incoming.m_condition; m_set_flags.Set(eCondition); } } @@ -363,7 +359,7 @@ StructuredData::ObjectSP BreakpointOptions::SerializeToStructuredData() { m_ignore_count); if (m_set_flags.Test(eCondition)) options_dict_sp->AddStringItem(GetKey(OptionNames::ConditionText), - m_condition_text); + m_condition.GetText()); if (m_set_flags.Test(eCallback) && m_baton_is_command_baton) { auto cmd_baton = @@ -464,29 +460,21 @@ bool BreakpointOptions::GetCommandLineCallbacks(StringList &command_list) { return true; } -void BreakpointOptions::SetCondition(const char *condition) { - if (!condition || condition[0] == '\0') { - condition = ""; +void BreakpointOptions::SetCondition(StopCondition condition) { + if (!condition) m_set_flags.Clear(eCondition); - } else m_set_flags.Set(eCondition); - m_condition_text.assign(condition); - std::hash hasher; - m_condition_text_hash = hasher(m_condition_text); + m_condition = std::move(condition); } -const char *BreakpointOptions::GetConditionText(size_t *hash) const { - if (!m_condition_text.empty()) { - if (hash) - *hash = m_condition_text_hash; - - return m_condition_text.c_str(); - } - return nullptr; +const StopCondition &BreakpointOptions::GetCondition() const { + return m_condition; } +StopCondition &BreakpointOptions::GetCondition() { return m_condition; } + const ThreadSpec *BreakpointOptions::GetThreadSpecNoCreate() const { return m_thread_spec_up.get(); } @@ -555,10 +543,10 @@ void BreakpointOptions::GetDescription(Stream *s, s->GetIndentLevel()); } } - if (!m_condition_text.empty()) { + if (m_condition) { if (level != eDescriptionLevelBrief) { s->EOL(); - s->Printf("Condition: %s\n", m_condition_text.c_str()); + s->Printf("Condition: %s\n", m_condition.GetText().data()); } } } @@ -652,5 +640,5 @@ void BreakpointOptions::Clear() m_baton_is_command_baton = false; m_callback_is_synchronous = false; m_enabled = false; - m_condition_text.clear(); + m_condition = StopCondition(); } diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp index 2440a7e46e961..38ec375c03070 100644 --- a/lldb/source/Commands/CommandObjectBreakpoint.cpp +++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -72,7 +72,7 @@ class lldb_private::BreakpointOptionGroup : public OptionGroup { case 'c': // Normally an empty breakpoint condition marks is as unset. But we need // to say it was passed in. - m_bp_opts.SetCondition(option_arg.str().c_str()); + m_bp_opts.GetCondition().SetText(option_arg.str()); m_bp_opts.m_set_flags.Set(BreakpointOptions::eCondition); break; case 'C': @@ -154,6 +154,21 @@ class lldb_private::BreakpointOptionGroup : public OptionGroup { m_bp_opts.GetThreadSpec()->SetIndex(thread_index); } } break; + case 'Y': { + LanguageType language = Language::GetLanguageTypeFromString(option_arg); + + LanguageSet languages_for_expressions = + Language::GetLanguagesSupportingTypeSystemsForExpressions(); + if (language == eLanguageTypeUnknown) + error = Status::FromError(CreateOptionParsingError( + option_arg, short_option, long_option, "invalid language")); + else if (!languages_for_expressions[language]) + error = Status::FromError( + CreateOptionParsingError(option_arg, short_option, long_option, + "no expression support for language")); + else + m_bp_opts.GetCondition().SetLanguage(language); + } break; default: llvm_unreachable("Unimplemented option"); } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index e543566e4ff1e..acb741081cac3 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -95,6 +95,12 @@ let Command = "breakpoint modify" in { def breakpoint_modify_condition : Option<"condition", "c">, Group<1>, Arg<"Expression">, Desc<"The breakpoint stops only if this condition " "expression evaluates to true.">; + def breakpoint_modify_condition_language + : Option<"condition-language", "Y">, + Group<1>, + Arg<"Language">, + Desc<"Specifies the Language to use when executing the breakpoint's " + "condition expression.">; def breakpoint_modify_auto_continue : Option<"auto-continue", "G">, Group<1>, Arg<"Boolean">, Desc<"The breakpoint will auto-continue after running its commands.">; diff --git a/lldb/source/Host/common/HostThread.cpp b/lldb/source/Host/common/HostThread.cpp index eec029be1c091..8822be016b0a1 100644 --- a/lldb/source/Host/common/HostThread.cpp +++ b/lldb/source/Host/common/HostThread.cpp @@ -44,3 +44,9 @@ lldb::thread_result_t HostThread::GetResult() const { bool HostThread::EqualsThread(lldb::thread_t thread) const { return m_native_thread->EqualsThread(thread); } + +bool HostThread::HasThread() const { + if (!m_native_thread) + return false; + return m_native_thread->GetSystemHandle() != LLDB_INVALID_HOST_THREAD; +} diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp index ed6047f8f4ef3..644084ba8d57a 100644 --- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp +++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp @@ -1146,7 +1146,7 @@ class InstructionLLVMC : public lldb_private::Instruction { } } - if (Log *log = GetLog(LLDBLog::Process)) { + if (Log *log = GetLog(LLDBLog::Process | LLDBLog::Disassembler)) { StreamString ss; ss.Printf("[%s] expands to %zu operands:\n", operands_string, diff --git a/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt b/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt index 3ec3cad4b8178..296159ea28407 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt +++ b/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt @@ -34,6 +34,7 @@ add_lldb_library(lldbPluginCPlusPlusLanguage PLUGIN LibStdcppTuple.cpp LibStdcppUniquePointer.cpp MsvcStl.cpp + MsvcStlSmartPointer.cpp MSVCUndecoratedNameParser.cpp LINK_COMPONENTS diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 17963c0273ba8..2db3e6f0ca315 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -1540,16 +1540,6 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { lldb_private::formatters::LibStdcppUniquePtrSyntheticFrontEndCreator, "std::unique_ptr synthetic children", "^std::unique_ptr<.+>(( )?&)?$", stl_synth_flags, true); - AddCXXSynthetic( - cpp_category_sp, - lldb_private::formatters::LibStdcppSharedPtrSyntheticFrontEndCreator, - "std::shared_ptr synthetic children", "^std::shared_ptr<.+>(( )?&)?$", - stl_synth_flags, true); - AddCXXSynthetic( - cpp_category_sp, - lldb_private::formatters::LibStdcppSharedPtrSyntheticFrontEndCreator, - "std::weak_ptr synthetic children", "^std::weak_ptr<.+>(( )?&)?$", - stl_synth_flags, true); AddCXXSynthetic( cpp_category_sp, lldb_private::formatters::LibStdcppTupleSyntheticFrontEndCreator, @@ -1580,14 +1570,6 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { lldb_private::formatters::LibStdcppUniquePointerSummaryProvider, "libstdc++ std::unique_ptr summary provider", "^std::unique_ptr<.+>(( )?&)?$", stl_summary_flags, true); - AddCXXSummary(cpp_category_sp, - lldb_private::formatters::LibStdcppSmartPointerSummaryProvider, - "libstdc++ std::shared_ptr summary provider", - "^std::shared_ptr<.+>(( )?&)?$", stl_summary_flags, true); - AddCXXSummary(cpp_category_sp, - lldb_private::formatters::LibStdcppSmartPointerSummaryProvider, - "libstdc++ std::weak_ptr summary provider", - "^std::weak_ptr<.+>(( )?&)?$", stl_summary_flags, true); AddCXXSummary(cpp_category_sp, lldb_private::formatters::StdlibCoroutineHandleSummaryProvider, "libstdc++ std::coroutine_handle summary provider", @@ -1598,6 +1580,25 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { "^std::optional<.+>(( )?&)?$", stl_summary_flags, true); } +static lldb_private::SyntheticChildrenFrontEnd * +GenericSmartPointerSyntheticFrontEndCreator(CXXSyntheticChildren *children, + lldb::ValueObjectSP valobj_sp) { + if (!valobj_sp) + return nullptr; + + if (IsMsvcStlSmartPointer(*valobj_sp)) + return MsvcStlSmartPointerSyntheticFrontEndCreator(valobj_sp); + return LibStdcppSharedPtrSyntheticFrontEndCreator(children, valobj_sp); +} + +static bool +GenericSmartPointerSummaryProvider(ValueObject &valobj, Stream &stream, + const TypeSummaryOptions &options) { + if (IsMsvcStlSmartPointer(valobj)) + return MsvcStlSmartPointerSummaryProvider(valobj, stream, options); + return LibStdcppSmartPointerSummaryProvider(valobj, stream, options); +} + /// Load formatters that are formatting types from more than one STL static void LoadCommonStlFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { if (!cpp_category_sp) @@ -1611,6 +1612,10 @@ static void LoadCommonStlFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { .SetDontShowValue(false) .SetShowMembersOneLiner(false) .SetHideItemNames(false); + SyntheticChildren::Flags stl_synth_flags; + stl_synth_flags.SetCascades(true).SetSkipPointers(false).SetSkipReferences( + false); + using StringElementType = StringPrinter::StringElementType; RegisterStdStringSummaryProvider( @@ -1636,6 +1641,20 @@ static void LoadCommonStlFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { return LibStdcppStringSummaryProvider(valobj, stream, options); }, "MSVC STL/libstdc++ std::wstring summary provider")); + + AddCXXSynthetic(cpp_category_sp, GenericSmartPointerSyntheticFrontEndCreator, + "std::shared_ptr synthetic children", + "^std::shared_ptr<.+>(( )?&)?$", stl_synth_flags, true); + AddCXXSynthetic(cpp_category_sp, GenericSmartPointerSyntheticFrontEndCreator, + "std::weak_ptr synthetic children", + "^std::weak_ptr<.+>(( )?&)?$", stl_synth_flags, true); + + AddCXXSummary(cpp_category_sp, GenericSmartPointerSummaryProvider, + "MSVC STL/libstdc++ std::shared_ptr summary provider", + "^std::shared_ptr<.+>(( )?&)?$", stl_summary_flags, true); + AddCXXSummary(cpp_category_sp, GenericSmartPointerSummaryProvider, + "MSVC STL/libstdc++ std::weak_ptr summary provider", + "^std::weak_ptr<.+>(( )?&)?$", stl_summary_flags, true); } static void LoadMsvcStlFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/Generic.cpp b/lldb/source/Plugins/Language/CPlusPlus/Generic.cpp index b237a8a27090c..bfe86e4665f65 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/Generic.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/Generic.cpp @@ -7,6 +7,8 @@ //===---------------------------------------------------------------------===// #include "Generic.h" +#include "LibStdcpp.h" +#include "MsvcStl.h" lldb::ValueObjectSP lldb_private::formatters::GetDesugaredSmartPointerValue( ValueObject &ptr, ValueObject &container) { @@ -16,7 +18,8 @@ lldb::ValueObjectSP lldb_private::formatters::GetDesugaredSmartPointerValue( auto arg = container_type.GetTypeTemplateArgument(0); if (!arg) - return nullptr; + // If there isn't enough debug info, use the pointer type as is + return ptr.GetSP(); return ptr.Cast(arg.GetPointerType()); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStl.h b/lldb/source/Plugins/Language/CPlusPlus/MsvcStl.h index e4ed923033aa7..edf3f4e8a5387 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/MsvcStl.h +++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStl.h @@ -29,6 +29,14 @@ bool MsvcStlWStringSummaryProvider( ValueObject &valobj, Stream &stream, const TypeSummaryOptions &options); // VC 2015+ std::wstring +// MSVC STL std::shared_ptr<> and std::weak_ptr<> +bool IsMsvcStlSmartPointer(ValueObject &valobj); +bool MsvcStlSmartPointerSummaryProvider(ValueObject &valobj, Stream &stream, + const TypeSummaryOptions &options); + +lldb_private::SyntheticChildrenFrontEnd * +MsvcStlSmartPointerSyntheticFrontEndCreator(lldb::ValueObjectSP valobj_sp); + } // namespace formatters } // namespace lldb_private diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlSmartPointer.cpp b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlSmartPointer.cpp new file mode 100644 index 0000000000000..b1aecc4b6611a --- /dev/null +++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlSmartPointer.cpp @@ -0,0 +1,165 @@ +//===-- MsvcStlSmartPointer.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Generic.h" +#include "MsvcStl.h" + +#include "lldb/DataFormatters/FormattersHelpers.h" +#include "lldb/DataFormatters/TypeSynthetic.h" + +using namespace lldb; + +bool lldb_private::formatters::IsMsvcStlSmartPointer(ValueObject &valobj) { + if (auto valobj_sp = valobj.GetNonSyntheticValue()) + return valobj_sp->GetChildMemberWithName("_Ptr") != nullptr; + + return false; +} + +bool lldb_private::formatters::MsvcStlSmartPointerSummaryProvider( + ValueObject &valobj, Stream &stream, const TypeSummaryOptions &options) { + ValueObjectSP valobj_sp(valobj.GetNonSyntheticValue()); + if (!valobj_sp) + return false; + + ValueObjectSP ptr_sp(valobj_sp->GetChildMemberWithName("_Ptr")); + ValueObjectSP ctrl_sp(valobj_sp->GetChildMemberWithName("_Rep")); + if (!ctrl_sp || !ptr_sp) + return false; + + DumpCxxSmartPtrPointerSummary(stream, *ptr_sp, options); + + bool success; + uint64_t ctrl_addr = ctrl_sp->GetValueAsUnsigned(0, &success); + // Empty control field (expired) + if (!success || ctrl_addr == 0) + return true; + + uint64_t uses = 0; + if (auto uses_sp = ctrl_sp->GetChildMemberWithName("_Uses")) { + bool success; + uses = uses_sp->GetValueAsUnsigned(0, &success); + if (!success) + return false; + + stream.Printf(" strong=%" PRIu64, uses); + } + + // _Weaks is the number of weak references - (_Uses != 0). + if (auto weak_count_sp = ctrl_sp->GetChildMemberWithName("_Weaks")) { + bool success; + uint64_t count = weak_count_sp->GetValueAsUnsigned(0, &success); + if (!success) + return false; + + stream.Printf(" weak=%" PRIu64, count - (uses != 0)); + } + + return true; +} + +namespace lldb_private { +namespace formatters { + +class MsvcStlSmartPointerSyntheticFrontEnd : public SyntheticChildrenFrontEnd { +public: + MsvcStlSmartPointerSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); + + llvm::Expected CalculateNumChildren() override; + + lldb::ValueObjectSP GetChildAtIndex(uint32_t idx) override; + + lldb::ChildCacheState Update() override; + + llvm::Expected GetIndexOfChildWithName(ConstString name) override; + + ~MsvcStlSmartPointerSyntheticFrontEnd() override; + +private: + ValueObject *m_ptr_obj = nullptr; +}; + +} // namespace formatters +} // namespace lldb_private + +lldb_private::formatters::MsvcStlSmartPointerSyntheticFrontEnd:: + MsvcStlSmartPointerSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) + : SyntheticChildrenFrontEnd(*valobj_sp) { + if (valobj_sp) + Update(); +} + +llvm::Expected lldb_private::formatters:: + MsvcStlSmartPointerSyntheticFrontEnd::CalculateNumChildren() { + return (m_ptr_obj ? 1 : 0); +} + +lldb::ValueObjectSP +lldb_private::formatters::MsvcStlSmartPointerSyntheticFrontEnd::GetChildAtIndex( + uint32_t idx) { + if (!m_ptr_obj) + return lldb::ValueObjectSP(); + + ValueObjectSP valobj_sp = m_backend.GetSP(); + if (!valobj_sp) + return lldb::ValueObjectSP(); + + if (idx == 0) + return m_ptr_obj->GetSP(); + + if (idx == 1) { + Status status; + ValueObjectSP value_sp = m_ptr_obj->Dereference(status); + if (status.Success()) + return value_sp; + } + + return lldb::ValueObjectSP(); +} + +lldb::ChildCacheState +lldb_private::formatters::MsvcStlSmartPointerSyntheticFrontEnd::Update() { + m_ptr_obj = nullptr; + + ValueObjectSP valobj_sp = m_backend.GetSP(); + if (!valobj_sp) + return lldb::ChildCacheState::eRefetch; + + auto ptr_obj_sp = valobj_sp->GetChildMemberWithName("_Ptr"); + if (!ptr_obj_sp) + return lldb::ChildCacheState::eRefetch; + + auto cast_ptr_sp = GetDesugaredSmartPointerValue(*ptr_obj_sp, *valobj_sp); + if (!cast_ptr_sp) + return lldb::ChildCacheState::eRefetch; + + m_ptr_obj = cast_ptr_sp->Clone(ConstString("pointer")).get(); + return lldb::ChildCacheState::eRefetch; +} + +llvm::Expected +lldb_private::formatters::MsvcStlSmartPointerSyntheticFrontEnd:: + GetIndexOfChildWithName(ConstString name) { + if (name == "pointer") + return 0; + + if (name == "object" || name == "$$dereference$$") + return 1; + + return llvm::createStringError("Type has no child named '%s'", + name.AsCString()); +} + +lldb_private::formatters::MsvcStlSmartPointerSyntheticFrontEnd:: + ~MsvcStlSmartPointerSyntheticFrontEnd() = default; + +lldb_private::SyntheticChildrenFrontEnd * +lldb_private::formatters::MsvcStlSmartPointerSyntheticFrontEndCreator( + lldb::ValueObjectSP valobj_sp) { + return new MsvcStlSmartPointerSyntheticFrontEnd(valobj_sp); +} diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index bba1230c79920..2aa02fd58335e 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -1271,7 +1271,7 @@ uint32_t Process::AssignIndexIDToThread(uint64_t thread_id) { } StateType Process::GetState() { - if (CurrentThreadIsPrivateStateThread()) + if (CurrentThreadPosesAsPrivateStateThread()) return m_private_state.GetValue(); else return m_public_state.GetValue(); @@ -3144,16 +3144,19 @@ void Process::CompleteAttach() { } } - if (!m_os_up) { + // If we don't have an operating system plugin loaded yet, see if + // LoadOperatingSystemPlugin can find one (and stuff it in m_os_up). + if (!m_os_up) LoadOperatingSystemPlugin(false); - if (m_os_up) { - // Somebody might have gotten threads before now, but we need to force the - // update after we've loaded the OperatingSystem plugin or it won't get a - // chance to process the threads. - m_thread_list.Clear(); - UpdateThreadListIfNeeded(); - } + + if (m_os_up) { + // Somebody might have gotten threads before we loaded the OS Plugin above, + // so we need to force the update now or the newly loaded plugin won't get + // a chance to process the threads. + m_thread_list.Clear(); + UpdateThreadListIfNeeded(); } + // Figure out which one is the executable, and set that in our target: ModuleSP new_executable_module_sp; for (ModuleSP module_sp : GetTarget().GetImages().Modules()) { @@ -5856,6 +5859,13 @@ bool Process::CurrentThreadIsPrivateStateThread() return m_private_state_thread.EqualsThread(Host::GetCurrentThread()); } +bool Process::CurrentThreadPosesAsPrivateStateThread() { + // If we haven't started up the private state thread yet, then whatever thread + // is fetching this event should be temporarily the private state thread. + if (!m_private_state_thread.HasThread()) + return true; + return m_private_state_thread.EqualsThread(Host::GetCurrentThread()); +} void Process::Flush() { m_thread_list.Flush(); diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index 9c6208e9e0a65..16cd2548c2784 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -723,7 +723,7 @@ void StackFrameList::SelectMostRelevantFrame() { // Don't call into the frame recognizers on the private state thread as // they can cause code to run in the target, and that can cause deadlocks // when fetching stop events for the expression. - if (m_thread.GetProcess()->CurrentThreadIsPrivateStateThread()) + if (m_thread.GetProcess()->CurrentThreadPosesAsPrivateStateThread()) return; Log *log = GetLog(LLDBLog::Thread); diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index 3160446ae1d17..19f89b8246926 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -465,7 +465,7 @@ class StopInfoBreakpoint : public StopInfo { // should stop, then we'll run the callback for the breakpoint. If // the callback says we shouldn't stop that will win. - if (bp_loc_sp->GetConditionText() == nullptr) + if (!bp_loc_sp->GetCondition()) actually_hit_any_locations = true; else { Status condition_error; @@ -484,7 +484,7 @@ class StopInfoBreakpoint : public StopInfo { strm << "stopped due to an error evaluating condition of " "breakpoint "; bp_loc_sp->GetDescription(&strm, eDescriptionLevelBrief); - strm << ": \"" << bp_loc_sp->GetConditionText() << "\"\n"; + strm << ": \"" << bp_loc_sp->GetCondition().GetText() << "\"\n"; strm << err_str; Debugger::ReportError( diff --git a/lldb/source/Utility/LLDBLog.cpp b/lldb/source/Utility/LLDBLog.cpp index b193bd4eb07dc..613dae42064a8 100644 --- a/lldb/source/Utility/LLDBLog.cpp +++ b/lldb/source/Utility/LLDBLog.cpp @@ -64,6 +64,9 @@ static constexpr Log::Category g_categories[] = { {"log symbol on-demand related activities"}, LLDBLog::OnDemand}, {{"source"}, {"log source related activities"}, LLDBLog::Source}, + {{"disassembler"}, + {"log disassembler related activities"}, + LLDBLog::Disassembler}, }; static Log::Channel g_log_channel(g_categories, diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_conditions/TestBreakpointConditions.py b/lldb/test/API/functionalities/breakpoint/breakpoint_conditions/TestBreakpointConditions.py index 4e7a8ccb9fbeb..a4c9c49bc89b6 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_conditions/TestBreakpointConditions.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_conditions/TestBreakpointConditions.py @@ -19,6 +19,16 @@ def test_breakpoint_condition_inline_and_run_command(self): self.build() self.breakpoint_conditions(inline=True) + def test_breakpoint_condition_and_run_command_language(self): + """Exercise breakpoint condition with 'breakpoint modify -c id'.""" + self.build() + self.breakpoint_conditions(cpp=True) + + def test_breakpoint_condition_inline_and_run_command_language(self): + """Exercise breakpoint condition inline with 'breakpoint set'.""" + self.build() + self.breakpoint_conditions(inline=True, cpp=True) + @add_test_categories(["pyapi"]) def test_breakpoint_condition_and_python_api(self): """Use Python APIs to set breakpoint conditions.""" @@ -42,17 +52,24 @@ def setUp(self): "main.c", "// Find the line number of c's parent call here." ) - def breakpoint_conditions(self, inline=False): + def breakpoint_conditions(self, inline=False, cpp=False): """Exercise breakpoint condition with 'breakpoint modify -c id'.""" exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) + if cpp: + condition = "&val != nullptr && val == 3" + cmd_args = " -c '{}' -Y c++".format(condition) + else: + condition = "val == 3" + cmd_args = "-c '{}'".format(condition) + if inline: # Create a breakpoint by function name 'c' and set the condition. lldbutil.run_break_set_by_symbol( self, "c", - extra_options="-c 'val == 3'", + extra_options=cmd_args, num_expected_locations=1, sym_exact=True, ) @@ -63,7 +80,7 @@ def breakpoint_conditions(self, inline=False): ) # And set a condition on the breakpoint to stop on when 'val == 3'. - self.runCmd("breakpoint modify -c 'val == 3' 1") + self.runCmd("breakpoint modify " + cmd_args + " 1") # Now run the program. self.runCmd("run", RUN_SUCCEEDED) @@ -82,7 +99,11 @@ def breakpoint_conditions(self, inline=False): self.expect( "breakpoint list -f", BREAKPOINT_HIT_ONCE, - substrs=["resolved = 1", "Condition: val == 3", "hit count = 1"], + substrs=[ + "resolved = 1", + "Condition: {}".format(condition), + "hit count = 1", + ], ) # The frame #0 should correspond to main.c:36, the executable statement diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/Makefile similarity index 54% rename from lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/Makefile rename to lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/Makefile index 564cbada74e08..99998b20bcb05 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/Makefile +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/Makefile @@ -1,6 +1,3 @@ CXX_SOURCES := main.cpp -USE_LIBCPP := 1 - -CXXFLAGS_EXTRAS := -O0 include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/TestDataFormatterStdIterator.py similarity index 69% rename from lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py rename to lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/TestDataFormatterStdIterator.py index c43ee46fb658a..373b1c9a2c8e8 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/TestDataFormatterStdIterator.py @@ -2,14 +2,13 @@ Test lldb data formatter subsystem. """ - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil -class LibcxxIteratorDataFormatterTestCase(TestBase): +class StdIteratorDataFormatterTestCase(TestBase): def setUp(self): # Call super's setUp(). TestBase.setUp(self) @@ -17,10 +16,8 @@ def setUp(self): self.line = line_number("main.cpp", "// Set break point at this line.") self.namespace = "std" - @add_test_categories(["libc++"]) - def test_with_run_command(self): - """Test that libc++ iterators format properly.""" - self.build() + def do_test(self): + """Test that iterators format properly.""" self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) lldbutil.run_break_set_by_file_and_line( @@ -69,18 +66,12 @@ def cleanup(): self.expect("frame variable svI", substrs=['item = "hello"']) self.expect("expr svI", substrs=['item = "hello"']) - self.expect("frame variable iiumI", substrs=["first = 61453", "second = 51966"]) - self.expect("expr iiumI", substrs=["first = 61453", "second = 51966"]) - - self.expect("frame variable siumI", substrs=['first = "hello"', "second = 137"]) - self.expect("expr siumI", substrs=['first = "hello"', "second = 137"]) - - self.expect("frame variable iiumI.first", substrs=["first = 61453"]) - self.expect("frame variable iiumI.first", substrs=["second"], matching=False) - self.expect("frame variable iiumI.second", substrs=["second = 51966"]) - self.expect("frame variable iiumI.second", substrs=["first"], matching=False) - - self.expect("frame variable siumI.first", substrs=['first = "hello"']) - self.expect("frame variable siumI.first", substrs=["second"], matching=False) - self.expect("frame variable siumI.second", substrs=["second = 137"]) - self.expect("frame variable siumI.second", substrs=["first"], matching=False) + @add_test_categories(["libc++"]) + def test_libcxx(self): + self.build(dictionary={"USE_LIBCPP": 1}) + self.do_test() + + @add_test_categories(["libstdcxx"]) + def test_libstdcxx(self): + self.build(dictionary={"USE_LIBSTDCPP": 1}) + self.do_test() diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/main.cpp similarity index 67% rename from lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp rename to lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/main.cpp index e53c0f167c325..8c11df075f247 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/iterator/main.cpp @@ -5,16 +5,11 @@ typedef std::map intint_map; typedef std::map strint_map; -typedef std::unordered_map intint_umap; -typedef std::unordered_map strint_umap; - typedef std::vector int_vector; typedef std::vector string_vector; typedef intint_map::iterator ii_map_iter; typedef strint_map::iterator si_map_iter; -typedef intint_umap::iterator ii_umap_iter; -typedef strint_umap::iterator si_umap_iter; typedef int_vector::iterator ivter; typedef string_vector::iterator svter; @@ -26,12 +21,6 @@ int main() { strint_map sim; sim["world"] = 42; - intint_umap iium; - iium[0xF00D] = 0xCAFE; - - strint_umap sium; - sium["hello"] = 137; - int_vector iv; iv.push_back(3); @@ -40,8 +29,6 @@ int main() { ii_map_iter iimI = iim.begin(); si_map_iter simI = sim.begin(); - ii_umap_iter iiumI = iium.begin(); - si_umap_iter siumI = sium.begin(); ivter ivI = iv.begin(); svter svI = sv.begin(); diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/shared_ptr/TestDataFormatterStdSharedPtr.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/shared_ptr/TestDataFormatterStdSharedPtr.py index 3d8569da0332e..d71fbf8d5f81a 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/shared_ptr/TestDataFormatterStdSharedPtr.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/shared_ptr/TestDataFormatterStdSharedPtr.py @@ -118,3 +118,9 @@ def test_libcxx(self): def test_libstdcxx(self): self.build(dictionary={"USE_LIBSTDCPP": 1}) self.do_test() + + @add_test_categories(["msvcstl"]) + def test_msvcstl(self): + # No flags, because the "msvcstl" category checks that the MSVC STL is used by default. + self.build() + self.do_test() diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/Makefile deleted file mode 100644 index c825977b1a5dc..0000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -CXX_SOURCES := main.cpp - -CFLAGS_EXTRAS := -O0 -USE_LIBSTDCPP := 1 - -include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/TestDataFormatterStdIterator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/TestDataFormatterStdIterator.py deleted file mode 100644 index a0d34fb56f970..0000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/TestDataFormatterStdIterator.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Test lldb data formatter subsystem. -""" - - -import lldb -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil - - -class StdIteratorDataFormatterTestCase(TestBase): - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - # Find the line number to break at. - self.line = line_number("main.cpp", "// Set break point at this line.") - - @add_test_categories(["libstdcxx"]) - @expectedFailureAll(bugnumber="llvm.org/pr50861", compiler="gcc") - def test_with_run_command(self): - """Test that libstdcpp iterators format properly.""" - self.build() - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=-1 - ) - - self.runCmd("run", RUN_SUCCEEDED) - - # The stop reason of the thread should be breakpoint. - self.expect( - "thread list", - STOPPED_DUE_TO_BREAKPOINT, - substrs=["stopped", "stop reason = breakpoint"], - ) - - # This is the function to remove the custom formats in order to have a - # clean slate for the next test case. - def cleanup(): - self.runCmd("type format clear", check=False) - self.runCmd("type summary clear", check=False) - self.runCmd("type filter clear", check=False) - self.runCmd("type synth clear", check=False) - - # Execute the cleanup function during test case tear down. - self.addTearDownHook(cleanup) - - self.expect("frame variable ivI", substrs=["item = 3"]) - self.expect("expr ivI", substrs=["item = 3"]) - - self.expect("frame variable iimI", substrs=["first = 0", "second = 12"]) - self.expect("expr iimI", substrs=["first = 0", "second = 12"]) - - self.expect("frame variable simI", substrs=['first = "world"', "second = 42"]) - self.expect("expr simI", substrs=['first = "world"', "second = 42"]) - - self.expect("frame variable svI", substrs=['item = "hello"']) - self.expect("expr svI", substrs=['item = "hello"']) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/main.cpp deleted file mode 100644 index 7ddffd19012e7..0000000000000 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/iterator/main.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include -#include - -typedef std::map intint_map; -typedef std::map strint_map; - -typedef std::vector int_vector; -typedef std::vector string_vector; - -typedef intint_map::iterator iimter; -typedef strint_map::iterator simter; - -typedef int_vector::iterator ivter; -typedef string_vector::iterator svter; - -int main() -{ - intint_map iim; - iim[0] = 12; - - strint_map sim; - sim["world"] = 42; - - int_vector iv; - iv.push_back(3); - - string_vector sv; - sv.push_back("hello"); - - iimter iimI = iim.begin(); - simter simI = sim.begin(); - - ivter ivI = iv.begin(); - svter svI = sv.begin(); - - return 0; // Set break point at this line. -} diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py index f4404d78492f9..de9900cae4b75 100644 --- a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py @@ -24,6 +24,10 @@ def create_thread(self, tid, context): return None def get_thread_info(self): + if self.process.state != lldb.eStateStopped: + print("Error: get_thread_info called with state not stopped") + return [] + if not self.threads: self.threads = [ { diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/Makefile b/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/Makefile new file mode 100644 index 0000000000000..93618844a7a4d --- /dev/null +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/Makefile @@ -0,0 +1,4 @@ +C_SOURCES := main.c +ENABLE_THREADS := YES + +include Makefile.rules diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/TestOSIndSYM.py b/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/TestOSIndSYM.py new file mode 100644 index 0000000000000..f0d192be661bb --- /dev/null +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/TestOSIndSYM.py @@ -0,0 +1,153 @@ +""" +Test that an OS plugin in a dSYM sees the right process state +when run from a dSYM on attach +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +import lldbsuite.test.lldbutil as lldbutil +from lldbgdbserverutils import get_debugserver_exe + +import os +import lldb +import time +import socket +import shutil + + +class TestOSPluginIndSYM(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + # The port used by debugserver. + PORT = 54638 + + # The number of attempts. + ATTEMPTS = 10 + + # Time given to the binary to launch and to debugserver to attach to it for + # every attempt. We'll wait a maximum of 10 times 2 seconds while the + # inferior will wait 10 times 10 seconds. + TIMEOUT = 2 + + def no_debugserver(self): + if get_debugserver_exe() is None: + return "no debugserver" + return None + + def port_not_available(self): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + if s.connect_ex(("127.0.0.1", self.PORT)) == 0: + return "{} not available".format(self.PORT) + return None + + @skipUnlessDarwin + def test_python_os_plugin(self): + self.do_test_python_os_plugin(False) + + @skipTestIfFn(no_debugserver) + @skipTestIfFn(port_not_available) + def test_python_os_plugin_remote(self): + self.do_test_python_os_plugin(True) + + def do_test_python_os_plugin(self, remote): + """Test that the environment for os plugins in dSYM's is correct""" + executable = self.build_dsym("my_binary") + + # Make sure we're set up to load the symbol file's python + self.runCmd("settings set target.load-script-from-symbol-file true") + + target = self.dbg.CreateTarget(None) + + error = lldb.SBError() + + # Now run the process, and then attach. When the attach + # succeeds, make sure that we were in the right state when + # the OS plugins were run. + if not remote: + popen = self.spawnSubprocess(executable, []) + + process = target.AttachToProcessWithID(lldb.SBListener(), popen.pid, error) + self.assertSuccess(error, "Attach succeeded") + else: + self.setup_remote_platform(executable) + process = target.process + self.assertTrue(process.IsValid(), "Got a valid process from debugserver") + + # We should have figured out the target from the result of the attach: + self.assertTrue(target.IsValid, "Got a valid target") + + # Make sure that we got the right plugin: + self.expect( + "settings show target.process.python-os-plugin-path", + substrs=["operating_system.py"], + ) + + for thread in process.threads: + stack_depth = thread.num_frames + reg_threads = thread.frames[0].reg + + # OKAY, that realized the threads, now see if the creation + # state was correct. The way we use the OS plugin, it doesn't need + # to create a thread, and doesn't have to call get_register_info, + # so we don't expect those to get called. + self.expect( + "test_report_command", + substrs=[ + "in_init=1", + "in_get_thread_info=1", + "in_create_thread=2", + "in_get_register_info=2", + "in_get_register_data=1", + ], + ) + + def build_dsym(self, name): + self.build(debug_info="dsym", dictionary={"EXE": name}) + executable = self.getBuildArtifact(name) + dsym_path = self.getBuildArtifact(name + ".dSYM") + python_dir_path = dsym_path + python_dir_path = os.path.join(dsym_path, "Contents", "Resources", "Python") + if not os.path.exists(python_dir_path): + os.mkdir(python_dir_path) + python_file_name = name + ".py" + + os_plugin_dir = os.path.join(python_dir_path, "OS_Plugin") + if not os.path.exists(os_plugin_dir): + os.mkdir(os_plugin_dir) + + plugin_dest_path = os.path.join(os_plugin_dir, "operating_system.py") + plugin_origin_path = os.path.join(self.getSourceDir(), "operating_system.py") + shutil.copy(plugin_origin_path, plugin_dest_path) + + module_dest_path = os.path.join(python_dir_path, python_file_name) + with open(module_dest_path, "w") as f: + f.write("def __lldb_init_module(debugger, unused):\n") + f.write( + f" debugger.HandleCommand(\"settings set target.process.python-os-plugin-path '{plugin_dest_path}'\")\n" + ) + f.close() + + return executable + + def setup_remote_platform(self, exe): + # Get debugserver to start up our process for us, and then we + # can use `process connect` to attach to it. + debugserver = get_debugserver_exe() + debugserver_args = ["localhost:{}".format(self.PORT), exe] + self.spawnSubprocess(debugserver, debugserver_args) + + # Select the platform. + self.runCmd("platform select remote-gdb-server") + + # Connect to debugserver + interpreter = self.dbg.GetCommandInterpreter() + connected = False + for i in range(self.ATTEMPTS): + result = lldb.SBCommandReturnObject() + interpreter.HandleCommand(f"gdb-remote localhost:{self.PORT}", result) + connected = result.Succeeded() + if connected: + break + time.sleep(self.TIMEOUT) + + self.assertTrue(connected, "could not connect to debugserver") diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/main.c b/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/main.c new file mode 100644 index 0000000000000..8e03f395e6110 --- /dev/null +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/main.c @@ -0,0 +1,8 @@ +#include + +int main() { + while (1) { + sleep(1); + } + return 0; +} diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/operating_system.py b/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/operating_system.py new file mode 100644 index 0000000000000..0f9cec670b73f --- /dev/null +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/os_plugin_in_dsym/operating_system.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python + +import lldb +import struct + +# Value is: +# 0 called - state is not stopped +# 1 called - state is stopped +# 2 not called + +stop_state = { + "in_init": 2, + "in_get_thread_info": 2, + "in_create_thread": 2, + "in_get_register_info": 2, + "in_get_register_data": 2, +} + + +def ReportCommand(debugger, command, exe_ctx, result, unused): + global stop_state + for state in stop_state: + result.AppendMessage(f"{state}={stop_state[state]}\n") + result.SetStatus(lldb.eReturnStatusSuccessFinishResult) + + +class OperatingSystemPlugIn: + """This class checks that all the""" + + def __init__(self, process): + """Initialization needs a valid.SBProcess object. + global stop_state + + This plug-in will get created after a live process is valid and has stopped for the + first time.""" + self.process = process + stop_state["in_init"] = self.state_is_stopped() + interp = process.target.debugger.GetCommandInterpreter() + result = lldb.SBCommandReturnObject() + cmd_str = ( + f"command script add test_report_command -o -f {__name__}.ReportCommand" + ) + interp.HandleCommand(cmd_str, result) + + def state_is_stopped(self): + if self.process.state == lldb.eStateStopped: + return 1 + else: + return 0 + + def does_plugin_report_all_threads(self): + return True + + def create_thread(self, tid, context): + global stop_state + stop_state["in_create_thread"] = self.state_is_stopped() + + return None + + def get_thread_info(self): + global stop_state + stop_state["in_get_thread_info"] = self.state_is_stopped() + idx = self.process.threads[0].idx + return [ + { + "tid": 0x111111111, + "name": "one", + "queue": "queue1", + "state": "stopped", + "stop_reason": "breakpoint", + "core": idx, + } + ] + + def get_register_info(self): + global stop_state + stop_state["in_get_register_info"] = self.state_is_stopped() + return None + + def get_register_data(self, tid): + global stop_state + stop_state["in_get_register_data"] = self.state_is_stopped() + return None diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py index ae8142ae4f484..a611cc30c1897 100644 --- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py +++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py @@ -44,22 +44,39 @@ def test_failing_launch_program(self): "'{0}' does not exist".format(program), response["body"]["error"]["format"] ) - def test_failing_launch_commands_and_run_in_terminal(self): + def test_failing_launch_commands_and_console(self): """ - Tests launching with an invalid program. + Tests launching with launch commands in an integrated terminal. """ program = self.getBuildArtifact("a.out") self.create_debug_adapter() response = self.launch( - program, launchCommands=["a b c"], runInTerminal=True, expectFailure=True + program, + launchCommands=["a b c"], + console="integratedTerminal", + expectFailure=True, ) self.assertFalse(response["success"]) self.assertTrue(self.get_dict_value(response, ["body", "error", "showUser"])) self.assertEqual( - "'launchCommands' and 'runInTerminal' are mutually exclusive", + "'launchCommands' and non-internal 'console' are mutually exclusive", self.get_dict_value(response, ["body", "error", "format"]), ) + def test_failing_console(self): + """ + Tests launching in console with an invalid terminal type. + """ + program = self.getBuildArtifact("a.out") + self.create_debug_adapter() + response = self.launch(program, console="invalid", expectFailure=True) + self.assertFalse(response["success"]) + self.assertTrue(self.get_dict_value(response, ["body", "error", "showUser"])) + self.assertRegex( + response["body"]["error"]["format"], + r"unexpected value, expected 'internalConsole\', 'integratedTerminal\' or 'externalTerminal\' at arguments.console", + ) + @skipIfWindows def test_termination(self): """ diff --git a/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py b/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py index 9cfa9b20f6051..3b769d2dd89ce 100644 --- a/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py +++ b/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py @@ -28,7 +28,7 @@ def test_stack_frame_name(self): parent_frame = self.dap_server.get_stackFrame(frameIndex=1) self.assertTrue(parent_frame["name"].endswith(" [opt]")) - @skipIfAsan # On ASAN builds this test intermittently fails https://github.com/llvm/llvm-project/issues/111061 + @skipIfAsan # On ASAN builds this test intermittently fails https://github.com/llvm/llvm-project/issues/111061 @skipIfWindows def test_optimized_variable(self): """Test optimized variable value contains error.""" @@ -50,9 +50,8 @@ def test_optimized_variable(self): value.startswith("&1 | FileCheck %s --check-prefix INVALID +INVALID: error: Invalid value ('bogus') for -Y (condition-language): invalid language + +RUN: not %lldb -b -o 'break set -n foo -c bar -Y python' 2>&1 | FileCheck %s --check-prefix NOEXPRSUPPORT +NOEXPRSUPPORT: error: Invalid value ('python') for -Y (condition-language): no expression support for language diff --git a/lldb/tools/lldb-dap/Handler/LaunchRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/LaunchRequestHandler.cpp index 1d7b4b7009462..553cbeaf849e2 100644 --- a/lldb/tools/lldb-dap/Handler/LaunchRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/LaunchRequestHandler.cpp @@ -23,9 +23,10 @@ namespace lldb_dap { /// Launch request; value of command field is 'launch'. Error LaunchRequestHandler::Run(const LaunchRequestArguments &arguments) const { // Validate that we have a well formed launch request. - if (!arguments.launchCommands.empty() && arguments.runInTerminal) + if (!arguments.launchCommands.empty() && + arguments.console != protocol::eConsoleInternal) return make_error( - "'launchCommands' and 'runInTerminal' are mutually exclusive"); + "'launchCommands' and non-internal 'console' are mutually exclusive"); dap.SetConfiguration(arguments.configuration, /*is_attach=*/false); dap.last_launch_request = arguments; diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp index 93bc80a38e29d..4fadf1c22e0e3 100644 --- a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp @@ -80,7 +80,8 @@ RunInTerminal(DAP &dap, const protocol::LaunchRequestArguments &arguments) { llvm::json::Object reverse_request = CreateRunInTerminalReverseRequest( arguments.configuration.program, arguments.args, arguments.env, - arguments.cwd, comm_file.m_path, debugger_pid); + arguments.cwd, comm_file.m_path, debugger_pid, + arguments.console == protocol::eConsoleExternalTerminal); dap.SendReverseRequest("runInTerminal", std::move(reverse_request)); @@ -192,7 +193,7 @@ llvm::Error BaseRequestHandler::LaunchProcess( // about process state changes during the launch. ScopeSyncMode scope_sync_mode(dap.debugger); - if (arguments.runInTerminal) { + if (arguments.console != protocol::eConsoleInternal) { if (llvm::Error err = RunInTerminal(dap, arguments)) return err; } else if (launchCommands.empty()) { diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.h b/lldb/tools/lldb-dap/Handler/RequestHandler.h index 07b079d19896d..16f8062f97d7b 100644 --- a/lldb/tools/lldb-dap/Handler/RequestHandler.h +++ b/lldb/tools/lldb-dap/Handler/RequestHandler.h @@ -540,11 +540,14 @@ class ThreadsRequestHandler Run(const protocol::ThreadsArguments &) const override; }; -class VariablesRequestHandler : public LegacyRequestHandler { +class VariablesRequestHandler + : public RequestHandler> { public: - using LegacyRequestHandler::LegacyRequestHandler; + using RequestHandler::RequestHandler; static llvm::StringLiteral GetCommand() { return "variables"; } - void operator()(const llvm::json::Object &request) const override; + llvm::Expected + Run(const protocol::VariablesArguments &) const override; }; class LocationsRequestHandler : public LegacyRequestHandler { diff --git a/lldb/tools/lldb-dap/Handler/VariablesRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/VariablesRequestHandler.cpp index 19bcca2b22b9b..5fa2b1ef5e20d 100644 --- a/lldb/tools/lldb-dap/Handler/VariablesRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/VariablesRequestHandler.cpp @@ -8,107 +8,37 @@ #include "DAP.h" #include "EventHelper.h" +#include "Handler/RequestHandler.h" #include "JSONUtils.h" -#include "RequestHandler.h" +#include "ProtocolUtils.h" + +using namespace llvm; +using namespace lldb_dap::protocol; namespace lldb_dap { -// "VariablesRequest": { -// "allOf": [ { "$ref": "#/definitions/Request" }, { -// "type": "object", -// "description": "Variables request; value of command field is 'variables'. -// Retrieves all child variables for the given variable reference. An -// optional filter can be used to limit the fetched children to either named -// or indexed children.", "properties": { -// "command": { -// "type": "string", -// "enum": [ "variables" ] -// }, -// "arguments": { -// "$ref": "#/definitions/VariablesArguments" -// } -// }, -// "required": [ "command", "arguments" ] -// }] -// }, -// "VariablesArguments": { -// "type": "object", -// "description": "Arguments for 'variables' request.", -// "properties": { -// "variablesReference": { -// "type": "integer", -// "description": "The Variable reference." -// }, -// "filter": { -// "type": "string", -// "enum": [ "indexed", "named" ], -// "description": "Optional filter to limit the child variables to either -// named or indexed. If ommited, both types are fetched." -// }, -// "start": { -// "type": "integer", -// "description": "The index of the first variable to return; if omitted -// children start at 0." -// }, -// "count": { -// "type": "integer", -// "description": "The number of variables to return. If count is missing -// or 0, all variables are returned." -// }, -// "format": { -// "$ref": "#/definitions/ValueFormat", -// "description": "Specifies details on how to format the Variable -// values." -// } -// }, -// "required": [ "variablesReference" ] -// }, -// "VariablesResponse": { -// "allOf": [ { "$ref": "#/definitions/Response" }, { -// "type": "object", -// "description": "Response to 'variables' request.", -// "properties": { -// "body": { -// "type": "object", -// "properties": { -// "variables": { -// "type": "array", -// "items": { -// "$ref": "#/definitions/Variable" -// }, -// "description": "All (or a range) of variables for the given -// variable reference." -// } -// }, -// "required": [ "variables" ] -// } -// }, -// "required": [ "body" ] -// }] -// } -void VariablesRequestHandler::operator()( - const llvm::json::Object &request) const { - llvm::json::Object response; - FillResponse(request, response); - llvm::json::Array variables; - const auto *arguments = request.getObject("arguments"); - const auto variablesReference = - GetInteger(arguments, "variablesReference").value_or(0); - const auto start = GetInteger(arguments, "start").value_or(0); - const auto count = GetInteger(arguments, "count").value_or(0); +/// Retrieves all child variables for the given variable reference. +/// +/// A filter can be used to limit the fetched children to either named or +/// indexed children. +Expected +VariablesRequestHandler::Run(const VariablesArguments &arguments) const { + const uint64_t var_ref = arguments.variablesReference; + const uint64_t count = arguments.count; + const uint64_t start = arguments.start; bool hex = false; - const auto *format = arguments->getObject("format"); - if (format) - hex = GetBoolean(format, "hex").value_or(false); + if (arguments.format) + hex = arguments.format->hex; + + std::vector variables; - if (lldb::SBValueList *top_scope = - dap.variables.GetTopLevelScope(variablesReference)) { + if (lldb::SBValueList *top_scope = dap.variables.GetTopLevelScope(var_ref)) { // variablesReference is one of our scopes, not an actual variable it is // asking for the list of args, locals or globals. int64_t start_idx = 0; int64_t num_children = 0; - if (variablesReference == VARREF_REGS) { + if (var_ref == VARREF_REGS) { // Change the default format of any pointer sized registers in the first // register set to be the lldb::eFormatAddressInfo so we show the pointer // and resolve what the pointer resolves to. Only change the format if the @@ -128,7 +58,7 @@ void VariablesRequestHandler::operator()( } num_children = top_scope->GetSize(); - if (num_children == 0 && variablesReference == VARREF_LOCALS) { + if (num_children == 0 && var_ref == VARREF_LOCALS) { // Check for an error in the SBValueList that might explain why we don't // have locals. If we have an error display it as the sole value in the // the locals. @@ -145,12 +75,11 @@ void VariablesRequestHandler::operator()( // errors are only set when there is a problem that the user could // fix, so no error will show up when you have no debug info, only when // we do have debug info and something that is fixable can be done. - llvm::json::Object object; - EmplaceSafeString(object, "name", ""); - EmplaceSafeString(object, "type", "const char *"); - EmplaceSafeString(object, "value", var_err); - object.try_emplace("variablesReference", (int64_t)0); - variables.emplace_back(std::move(object)); + Variable var; + var.name = ""; + var.type = "const char *"; + var.value = var_err; + variables.emplace_back(var); } } const int64_t end_idx = start_idx + ((count == 0) ? num_children : count); @@ -165,7 +94,7 @@ void VariablesRequestHandler::operator()( } // Show return value if there is any ( in the local top frame ) - if (variablesReference == VARREF_LOCALS) { + if (var_ref == VARREF_LOCALS) { auto process = dap.target.GetProcess(); auto selected_thread = process.GetSelectedThread(); lldb::SBValue stop_return_value = selected_thread.GetStopReturnValue(); @@ -194,32 +123,35 @@ void VariablesRequestHandler::operator()( if (!variable.IsValid()) break; - int64_t var_ref = + const int64_t frame_var_ref = dap.variables.InsertVariable(variable, /*is_permanent=*/false); variables.emplace_back(CreateVariable( - variable, var_ref, hex, dap.configuration.enableAutoVariableSummaries, + variable, frame_var_ref, hex, + dap.configuration.enableAutoVariableSummaries, dap.configuration.enableSyntheticChildDebugging, variable_name_counts[GetNonNullVariableName(variable)] > 1)); } } else { // We are expanding a variable that has children, so we will return its // children. - lldb::SBValue variable = dap.variables.GetVariable(variablesReference); + lldb::SBValue variable = dap.variables.GetVariable(var_ref); if (variable.IsValid()) { + const bool is_permanent = + dap.variables.IsPermanentVariableReference(var_ref); auto addChild = [&](lldb::SBValue child, std::optional custom_name = {}) { if (!child.IsValid()) return; - bool is_permanent = - dap.variables.IsPermanentVariableReference(variablesReference); - int64_t var_ref = dap.variables.InsertVariable(child, is_permanent); - variables.emplace_back(CreateVariable( - child, var_ref, hex, dap.configuration.enableAutoVariableSummaries, - dap.configuration.enableSyntheticChildDebugging, - /*is_name_duplicated=*/false, custom_name)); + const int64_t child_var_ref = + dap.variables.InsertVariable(child, is_permanent); + variables.emplace_back( + CreateVariable(child, child_var_ref, hex, + dap.configuration.enableAutoVariableSummaries, + dap.configuration.enableSyntheticChildDebugging, + /*is_name_duplicated=*/false, custom_name)); }; const int64_t num_children = variable.GetNumChildren(); - int64_t end_idx = start + ((count == 0) ? num_children : count); + const int64_t end_idx = start + ((count == 0) ? num_children : count); int64_t i = start; for (; i < end_idx && i < num_children; ++i) addChild(variable.GetChildAtIndex(i)); @@ -233,10 +165,8 @@ void VariablesRequestHandler::operator()( addChild(variable.GetNonSyntheticValue(), "[raw]"); } } - llvm::json::Object body; - body.try_emplace("variables", std::move(variables)); - response.try_emplace("body", std::move(body)); - dap.SendJSON(llvm::json::Value(std::move(response))); + + return VariablesResponseBody{variables}; } } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index 553c52605c998..41ca29a405ac9 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -120,6 +120,42 @@ DecodeMemoryReference(llvm::StringRef memoryReference) { return addr; } +bool DecodeMemoryReference(const llvm::json::Value &v, llvm::StringLiteral key, + lldb::addr_t &out, llvm::json::Path path, + bool required) { + const llvm::json::Object *v_obj = v.getAsObject(); + if (!v_obj) { + path.report("expected object"); + return false; + } + + const llvm::json::Value *mem_ref_value = v_obj->get(key); + if (!mem_ref_value) { + if (!required) + return true; + + path.field(key).report("missing value"); + return false; + } + + const std::optional mem_ref_str = + mem_ref_value->getAsString(); + if (!mem_ref_str) { + path.field(key).report("expected string"); + return false; + } + + const std::optional addr_opt = + DecodeMemoryReference(*mem_ref_str); + if (!addr_opt) { + path.field(key).report("malformed memory reference"); + return false; + } + + out = *addr_opt; + return true; +} + std::vector GetStrings(const llvm::json::Object *obj, llvm::StringRef key) { std::vector strs; @@ -768,38 +804,6 @@ VariableDescription::VariableDescription(lldb::SBValue v, evaluate_name = llvm::StringRef(evaluateStream.GetData()).str(); } -llvm::json::Object VariableDescription::GetVariableExtensionsJSON() { - llvm::json::Object extensions; - if (error) - EmplaceSafeString(extensions, "error", *error); - if (!value.empty()) - EmplaceSafeString(extensions, "value", value); - if (!summary.empty()) - EmplaceSafeString(extensions, "summary", summary); - if (auto_summary) - EmplaceSafeString(extensions, "autoSummary", *auto_summary); - - if (lldb::SBDeclaration decl = v.GetDeclaration(); decl.IsValid()) { - llvm::json::Object decl_obj; - if (lldb::SBFileSpec file = decl.GetFileSpec(); file.IsValid()) { - char path[PATH_MAX] = ""; - if (file.GetPath(path, sizeof(path)) && - lldb::SBFileSpec::ResolvePath(path, path, PATH_MAX)) { - decl_obj.try_emplace("path", std::string(path)); - } - } - - if (int line = decl.GetLine()) - decl_obj.try_emplace("line", line); - if (int column = decl.GetColumn()) - decl_obj.try_emplace("column", column); - - if (!decl_obj.empty()) - extensions.try_emplace("declaration", std::move(decl_obj)); - } - return extensions; -} - std::string VariableDescription::GetResult(llvm::StringRef context) { // In repl context, the results can be displayed as multiple lines so more // detailed descriptions can be returned. @@ -836,226 +840,6 @@ std::pair UnpackLocation(int64_t location_id) { return std::pair{location_id >> 1, location_id & 1}; } -// "Variable": { -// "type": "object", -// "description": "A Variable is a name/value pair. Optionally a variable -// can have a 'type' that is shown if space permits or when -// hovering over the variable's name. An optional 'kind' is -// used to render additional properties of the variable, -// e.g. different icons can be used to indicate that a -// variable is public or private. If the value is -// structured (has children), a handle is provided to -// retrieve the children with the VariablesRequest. If -// the number of named or indexed children is large, the -// numbers should be returned via the optional -// 'namedVariables' and 'indexedVariables' attributes. The -// client can use this optional information to present the -// children in a paged UI and fetch them in chunks.", -// "properties": { -// "name": { -// "type": "string", -// "description": "The variable's name." -// }, -// "value": { -// "type": "string", -// "description": "The variable's value. This can be a multi-line text, -// e.g. for a function the body of a function." -// }, -// "type": { -// "type": "string", -// "description": "The type of the variable's value. Typically shown in -// the UI when hovering over the value." -// }, -// "presentationHint": { -// "$ref": "#/definitions/VariablePresentationHint", -// "description": "Properties of a variable that can be used to determine -// how to render the variable in the UI." -// }, -// "evaluateName": { -// "type": "string", -// "description": "Optional evaluatable name of this variable which can -// be passed to the 'EvaluateRequest' to fetch the -// variable's value." -// }, -// "variablesReference": { -// "type": "integer", -// "description": "If variablesReference is > 0, the variable is -// structured and its children can be retrieved by -// passing variablesReference to the VariablesRequest." -// }, -// "namedVariables": { -// "type": "integer", -// "description": "The number of named child variables. The client can -// use this optional information to present the children -// in a paged UI and fetch them in chunks." -// }, -// "indexedVariables": { -// "type": "integer", -// "description": "The number of indexed child variables. The client -// can use this optional information to present the -// children in a paged UI and fetch them in chunks." -// }, -// "memoryReference": { -// "type": "string", -// "description": "A memory reference associated with this variable. -// For pointer type variables, this is generally a -// reference to the memory address contained in the -// pointer. For executable data, this reference may later -// be used in a `disassemble` request. This attribute may -// be returned by a debug adapter if corresponding -// capability `supportsMemoryReferences` is true." -// }, -// "declarationLocationReference": { -// "type": "integer", -// "description": "A reference that allows the client to request the -// location where the variable is declared. This should be -// present only if the adapter is likely to be able to -// resolve the location.\n\nThis reference shares the same -// lifetime as the `variablesReference`. See 'Lifetime of -// Object References' in the Overview section for -// details." -// }, -// "valueLocationReference": { -// "type": "integer", -// "description": "A reference that allows the client to request the -// location where the variable's value is declared. For -// example, if the variable contains a function pointer, -// the adapter may be able to look up the function's -// location. This should be present only if the adapter -// is likely to be able to resolve the location.\n\nThis -// reference shares the same lifetime as the -// `variablesReference`. See 'Lifetime of Object -// References' in the Overview section for details." -// }, -// -// "$__lldb_extensions": { -// "description": "Unofficial extensions to the protocol", -// "properties": { -// "declaration": { -// "type": "object", -// "description": "The source location where the variable was -// declared. This value won't be present if no -// declaration is available. -// Superseded by `declarationLocationReference`", -// "properties": { -// "path": { -// "type": "string", -// "description": "The source file path where the variable was -// declared." -// }, -// "line": { -// "type": "number", -// "description": "The 1-indexed source line where the variable -// was declared." -// }, -// "column": { -// "type": "number", -// "description": "The 1-indexed source column where the variable -// was declared." -// } -// } -// }, -// "value": { -// "type": "string", -// "description": "The internal value of the variable as returned by -// This is effectively SBValue.GetValue(). The other -// `value` entry in the top-level variable response -// is, on the other hand, just a display string for -// the variable." -// }, -// "summary": { -// "type": "string", -// "description": "The summary string of the variable. This is -// effectively SBValue.GetSummary()." -// }, -// "autoSummary": { -// "type": "string", -// "description": "The auto generated summary if using -// `enableAutoVariableSummaries`." -// }, -// "error": { -// "type": "string", -// "description": "An error message generated if LLDB couldn't inspect -// the variable." -// } -// } -// } -// }, -// "required": [ "name", "value", "variablesReference" ] -// } -llvm::json::Value CreateVariable(lldb::SBValue v, int64_t var_ref, - bool format_hex, bool auto_variable_summaries, - bool synthetic_child_debugging, - bool is_name_duplicated, - std::optional custom_name) { - VariableDescription desc(v, auto_variable_summaries, format_hex, - is_name_duplicated, custom_name); - llvm::json::Object object; - EmplaceSafeString(object, "name", desc.name); - EmplaceSafeString(object, "value", desc.display_value); - - if (!desc.evaluate_name.empty()) - EmplaceSafeString(object, "evaluateName", desc.evaluate_name); - - // If we have a type with many children, we would like to be able to - // give a hint to the IDE that the type has indexed children so that the - // request can be broken up in grabbing only a few children at a time. We - // want to be careful and only call "v.GetNumChildren()" if we have an array - // type or if we have a synthetic child provider producing indexed children. - // We don't want to call "v.GetNumChildren()" on all objects as class, struct - // and union types don't need to be completed if they are never expanded. So - // we want to avoid calling this to only cases where we it makes sense to keep - // performance high during normal debugging. - - // If we have an array type, say that it is indexed and provide the number - // of children in case we have a huge array. If we don't do this, then we - // might take a while to produce all children at onces which can delay your - // debug session. - if (desc.type_obj.IsArrayType()) { - object.try_emplace("indexedVariables", v.GetNumChildren()); - } else if (v.IsSynthetic()) { - // For a type with a synthetic child provider, the SBType of "v" won't tell - // us anything about what might be displayed. Instead, we check if the first - // child's name is "[0]" and then say it is indexed. We call - // GetNumChildren() only if the child name matches to avoid a potentially - // expensive operation. - if (lldb::SBValue first_child = v.GetChildAtIndex(0)) { - llvm::StringRef first_child_name = first_child.GetName(); - if (first_child_name == "[0]") { - size_t num_children = v.GetNumChildren(); - // If we are creating a "[raw]" fake child for each synthetic type, we - // have to account for it when returning indexed variables. - if (synthetic_child_debugging) - ++num_children; - object.try_emplace("indexedVariables", num_children); - } - } - } - EmplaceSafeString(object, "type", desc.display_type_name); - - // A unique variable identifier to help in properly identifying variables with - // the same name. This is an extension to the VS protocol. - object.try_emplace("id", var_ref); - - if (v.MightHaveChildren()) - object.try_emplace("variablesReference", var_ref); - else - object.try_emplace("variablesReference", 0); - - if (v.GetDeclaration().IsValid()) - object.try_emplace("declarationLocationReference", - PackLocation(var_ref, false)); - - if (ValuePointsToCode(v)) - object.try_emplace("valueLocationReference", PackLocation(var_ref, true)); - - if (lldb::addr_t addr = v.GetLoadAddress(); addr != LLDB_INVALID_ADDRESS) - object.try_emplace("memoryReference", EncodeMemoryReference(addr)); - - object.try_emplace("$__lldb_extensions", desc.GetVariableExtensionsJSON()); - return llvm::json::Value(std::move(object)); -} - llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit) { llvm::json::Object object; char unit_path_arr[PATH_MAX]; @@ -1070,12 +854,16 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit) { llvm::json::Object CreateRunInTerminalReverseRequest( llvm::StringRef program, const std::vector &args, const llvm::StringMap &env, llvm::StringRef cwd, - llvm::StringRef comm_file, lldb::pid_t debugger_pid) { + llvm::StringRef comm_file, lldb::pid_t debugger_pid, bool external) { llvm::json::Object run_in_terminal_args; - // This indicates the IDE to open an embedded terminal, instead of opening - // the terminal in a new window. - run_in_terminal_args.try_emplace("kind", "integrated"); - + if (external) { + // This indicates the IDE to open an external terminal window. + run_in_terminal_args.try_emplace("kind", "external"); + } else { + // This indicates the IDE to open an embedded terminal, instead of opening + // the terminal in a new window. + run_in_terminal_args.try_emplace("kind", "integrated"); + } // The program path must be the first entry in the "args" field std::vector req_args = {DAP::debug_adapter_path.str(), "--comm-file", comm_file.str()}; diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h index 0424438ad5b72..e9094f67b94ec 100644 --- a/lldb/tools/lldb-dap/JSONUtils.h +++ b/lldb/tools/lldb-dap/JSONUtils.h @@ -138,6 +138,30 @@ std::string EncodeMemoryReference(lldb::addr_t addr); std::optional DecodeMemoryReference(llvm::StringRef memoryReference); +/// Decodes a memory reference from the given json value. +/// +/// \param[in] v +/// A JSON value that we expected to contain the memory reference. +/// +/// \param[in] key +/// The key of the memory reference. +/// +/// \param[out] out +/// The memory address, if successfully decoded. +/// +/// \param[in] path +/// The path for reporting errors. +/// +/// \param[in] required +/// Indicates if the key is required to be present, otherwise report an error +/// if the key is missing. +/// +/// \return +/// Returns \b true if the address was decoded successfully. +bool DecodeMemoryReference(const llvm::json::Value &v, llvm::StringLiteral key, + lldb::addr_t &out, llvm::json::Path path, + bool required); + /// Extract an array of strings for the specified key from an object. /// /// String values in the array will be extracted without any quotes @@ -326,10 +350,6 @@ struct VariableDescription { bool format_hex = false, bool is_name_duplicated = false, std::optional custom_name = {}); - /// Create a JSON object that represents these extensions to the DAP variable - /// response. - llvm::json::Object GetVariableExtensionsJSON(); - /// Returns a description of the value appropriate for the specified context. std::string GetResult(llvm::StringRef context); }; @@ -344,61 +364,6 @@ int64_t PackLocation(int64_t var_ref, bool is_value_location); /// Reverse of `PackLocation` std::pair UnpackLocation(int64_t location_id); -/// Create a "Variable" object for a LLDB thread object. -/// -/// This function will fill in the following keys in the returned -/// object: -/// "name" - the name of the variable -/// "value" - the value of the variable as a string -/// "type" - the typename of the variable as a string -/// "id" - a unique identifier for a value in case there are multiple -/// variables with the same name. Other parts of the DAP -/// protocol refer to values by name so this can help -/// disambiguate such cases if a IDE passes this "id" value -/// back down. -/// "variablesReference" - Zero if the variable has no children, -/// non-zero integer otherwise which can be used to expand -/// the variable. -/// "evaluateName" - The name of the variable to use in expressions -/// as a string. -/// -/// \param[in] v -/// The LLDB value to use when populating out the "Variable" -/// object. -/// -/// \param[in] var_ref -/// The variable reference. Used to identify the value, e.g. -/// in the `variablesReference` or `declarationLocationReference` -/// properties. -/// -/// \param[in] format_hex -/// If set to true the variable will be formatted as hex in -/// the "value" key value pair for the value of the variable. -/// -/// \param[in] auto_variable_summaries -/// IF set to true the variable will create an automatic variable summary. -/// -/// \param[in] is_name_duplicated -/// Whether the same variable name appears multiple times within the same -/// context (e.g. locals). This can happen due to shadowed variables in -/// nested blocks. -/// -/// As VSCode doesn't render two of more variables with the same name, we -/// apply a suffix to distinguish duplicated variables. -/// -/// \param[in] custom_name -/// A provided custom name that is used instead of the SBValue's when -/// creating the JSON representation. -/// -/// \return -/// A "Variable" JSON object with that follows the formal JSON -/// definition outlined by Microsoft. -llvm::json::Value CreateVariable(lldb::SBValue v, int64_t var_ref, - bool format_hex, bool auto_variable_summaries, - bool synthetic_child_debugging, - bool is_name_duplicated = false, - std::optional custom_name = {}); - llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit); /// Create a runInTerminal reverse request object @@ -423,13 +388,17 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit); /// launcher uses it on Linux tell the kernel that it should allow the /// debugger process to attach. /// +/// \param[in] external +/// If set to true, the program will run in an external terminal window +/// instead of IDE's integrated terminal. +/// /// \return /// A "runInTerminal" JSON object that follows the specification outlined by /// Microsoft. llvm::json::Object CreateRunInTerminalReverseRequest( llvm::StringRef program, const std::vector &args, const llvm::StringMap &env, llvm::StringRef cwd, - llvm::StringRef comm_file, lldb::pid_t debugger_pid); + llvm::StringRef comm_file, lldb::pid_t debugger_pid, bool external); /// Create a "Terminated" JSON object that contains statistics /// diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp index 83a205f118fc0..d9d688b4c41fe 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp @@ -262,6 +262,34 @@ json::Value toJSON(const BreakpointLocationsResponseBody &BLRB) { return json::Object{{"breakpoints", BLRB.breakpoints}}; } +bool fromJSON(const json::Value &Params, Console &C, json::Path P) { + auto oldFormatConsole = Params.getAsBoolean(); + if (oldFormatConsole) { + C = *oldFormatConsole ? eConsoleIntegratedTerminal : eConsoleInternal; + return true; + } + auto newFormatConsole = Params.getAsString(); + if (!newFormatConsole) { + P.report("expected a string"); + return false; + } + + std::optional console = + StringSwitch>(*newFormatConsole) + .Case("internalConsole", eConsoleInternal) + .Case("integratedTerminal", eConsoleIntegratedTerminal) + .Case("externalTerminal", eConsoleExternalTerminal) + .Default(std::nullopt); + if (!console) { + P.report("unexpected value, expected 'internalConsole', " + "'integratedTerminal' or 'externalTerminal'"); + return false; + } + + C = *console; + return true; +} + bool fromJSON(const json::Value &Params, LaunchRequestArguments &LRA, json::Path P) { json::ObjectMapper O(Params, P); @@ -273,9 +301,8 @@ bool fromJSON(const json::Value &Params, LaunchRequestArguments &LRA, O.mapOptional("disableASLR", LRA.disableASLR) && O.mapOptional("disableSTDIO", LRA.disableSTDIO) && O.mapOptional("shellExpandArguments", LRA.shellExpandArguments) && - - O.mapOptional("runInTerminal", LRA.runInTerminal) && - parseEnv(Params, LRA.env, P); + O.mapOptional("runInTerminal", LRA.console) && + O.mapOptional("console", LRA.console) && parseEnv(Params, LRA.env, P); } bool fromJSON(const json::Value &Params, AttachRequestArguments &ARA, @@ -531,6 +558,41 @@ json::Value toJSON(const ModulesResponseBody &MR) { return result; } +bool fromJSON(const json::Value &Param, VariablesArguments::VariablesFilter &VA, + json::Path Path) { + auto rawFilter = Param.getAsString(); + if (!rawFilter) { + Path.report("expected a string"); + return false; + } + std::optional filter = + StringSwitch>( + *rawFilter) + .Case("indexed", VariablesArguments::eVariablesFilterIndexed) + .Case("named", VariablesArguments::eVariablesFilterNamed) + .Default(std::nullopt); + if (!filter) { + Path.report("unexpected value, expected 'named' or 'indexed'"); + return false; + } + + VA = *filter; + return true; +} + +bool fromJSON(const json::Value &Param, VariablesArguments &VA, + json::Path Path) { + json::ObjectMapper O(Param, Path); + return O && O.map("variablesReference", VA.variablesReference) && + O.mapOptional("filter", VA.filter) && + O.mapOptional("start", VA.start) && O.mapOptional("count", VA.count) && + O.mapOptional("format", VA.format); +} + +json::Value toJSON(const VariablesResponseBody &VRB) { + return json::Object{{"variables", VRB.variables}}; +} + bool fromJSON(const json::Value &Params, WriteMemoryArguments &WMA, json::Path P) { json::ObjectMapper O(Params, P); diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h index 1544815be9389..07c4afbaa8700 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h @@ -242,6 +242,12 @@ struct Configuration { std::string platformName; }; +enum Console : unsigned { + eConsoleInternal, + eConsoleIntegratedTerminal, + eConsoleExternalTerminal +}; + /// lldb-dap specific launch arguments. struct LaunchRequestArguments { /// Common lldb-dap configuration values for launching/attaching operations. @@ -290,9 +296,9 @@ struct LaunchRequestArguments { /// Set whether to shell expand arguments to the process when launching. bool shellExpandArguments = false; - /// Launch the program inside an integrated terminal in the IDE. Useful for - /// debugging interactive command line programs. - bool runInTerminal = false; + /// Specify where to launch the program: internal console, integrated + /// terminal or external terminal. + Console console = eConsoleInternal; /// @} }; @@ -896,6 +902,54 @@ struct ModulesResponseBody { }; llvm::json::Value toJSON(const ModulesResponseBody &); +/// Arguments for `variables` request. +struct VariablesArguments { + /// The variable for which to retrieve its children. The `variablesReference` + /// must have been obtained in the current suspended state. See 'Lifetime of + /// Object References' in the Overview section for details. + uint64_t variablesReference; + + enum VariablesFilter : unsigned { + eVariablesFilterBoth = 0, + eVariablesFilterIndexed = 1 << 0, + eVariablesFilterNamed = 1 << 1, + }; + + /// Filter to limit the child variables to either named or indexed. If + /// omitted, both types are fetched. + VariablesFilter filter = eVariablesFilterBoth; + + /// The index of the first variable to return; if omitted children start at 0. + /// + /// The attribute is only honored by a debug adapter if the corresponding + /// capability `supportsVariablePaging` is true. + uint64_t start = 0; + + /// The number of variables to return. If count is missing or 0, all variables + /// are returned. + /// + /// The attribute is only honored by a debug adapter if the corresponding + /// capability `supportsVariablePaging` is true. + uint64_t count = 0; + + /// Specifies details on how to format the Variable values. + /// + /// The attribute is only honored by a debug adapter if the corresponding + /// capability `supportsValueFormattingOptions` is true. + std::optional format; +}; +bool fromJSON(const llvm::json::Value &Param, + VariablesArguments::VariablesFilter &VA, llvm::json::Path Path); +bool fromJSON(const llvm::json::Value &, VariablesArguments &, + llvm::json::Path); + +/// Response to `variables` request. +struct VariablesResponseBody { + /// All (or a range) of variables for the given variable reference. + std::vector variables; +}; +llvm::json::Value toJSON(const VariablesResponseBody &); + /// Arguments for `writeMemory` request. struct WriteMemoryArguments { /// Memory reference to the base location to which data should be written. diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp index 9b5c9ef348ca4..785830c693104 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp @@ -9,6 +9,7 @@ #include "Protocol/ProtocolTypes.h" #include "JSONUtils.h" #include "ProtocolUtils.h" +#include "lldb/lldb-defines.h" #include "lldb/lldb-types.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -953,4 +954,71 @@ json::Value toJSON(const Module &M) { return result; } +json::Value toJSON(const VariablePresentationHint &VPH) { + json::Object result{}; + + if (!VPH.kind.empty()) + result.insert({"kind", VPH.kind}); + if (!VPH.attributes.empty()) + result.insert({"attributes", VPH.attributes}); + if (!VPH.visibility.empty()) + result.insert({"visibility", VPH.visibility}); + if (VPH.lazy) + result.insert({"lazy", VPH.lazy}); + + return result; +} + +bool fromJSON(const json::Value &Param, VariablePresentationHint &VPH, + json::Path Path) { + json::ObjectMapper O(Param, Path); + return O && O.mapOptional("kind", VPH.kind) && + O.mapOptional("attributes", VPH.attributes) && + O.mapOptional("visibility", VPH.visibility) && + O.mapOptional("lazy", VPH.lazy); +} + +json::Value toJSON(const Variable &V) { + json::Object result{{"name", V.name}, + {"variablesReference", V.variablesReference}, + {"value", V.value}}; + + if (!V.type.empty()) + result.insert({"type", V.type}); + if (V.presentationHint) + result.insert({"presentationHint", *V.presentationHint}); + if (!V.evaluateName.empty()) + result.insert({"evaluateName", V.evaluateName}); + if (V.namedVariables) + result.insert({"namedVariables", V.namedVariables}); + if (V.indexedVariables) + result.insert({"indexedVariables", V.indexedVariables}); + if (V.memoryReference != LLDB_INVALID_ADDRESS) + result.insert( + {"memoryReference", EncodeMemoryReference(V.memoryReference)}); + if (V.declarationLocationReference) + result.insert( + {"declarationLocationReference", V.declarationLocationReference}); + if (V.valueLocationReference) + result.insert({"valueLocationReference", V.valueLocationReference}); + + return result; +} + +bool fromJSON(const json::Value &Param, Variable &V, json::Path Path) { + json::ObjectMapper O(Param, Path); + return O && O.map("name", V.name) && + O.map("variablesReference", V.variablesReference) && + O.map("value", V.value) && O.mapOptional("type", V.type) && + O.mapOptional("presentationHint", *V.presentationHint) && + O.mapOptional("evaluateName", V.evaluateName) && + O.mapOptional("namedVariables", V.namedVariables) && + O.mapOptional("indexedVariables", V.indexedVariables) && + O.mapOptional("declarationLocationReference", + V.declarationLocationReference) && + O.mapOptional("valueLocationReference", V.valueLocationReference) && + DecodeMemoryReference(Param, "memoryReference", V.memoryReference, + Path, /*required=*/false); +} + } // namespace lldb_dap::protocol diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h index 2bb765e956256..89122c8f66307 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h @@ -475,7 +475,7 @@ llvm::json::Value toJSON(const Thread &); /// Provides formatting information for a value. struct ValueFormat { /// Display the value in hex. - std::optional hex; + bool hex = false; }; bool fromJSON(const llvm::json::Value &, ValueFormat &, llvm::json::Path); @@ -789,6 +789,137 @@ struct Module { }; llvm::json::Value toJSON(const Module &); +/// Properties of a variable that can be used to determine how to render the +/// variable in the UI. +struct VariablePresentationHint { + /// The kind of variable. Before introducing additional values, try to use the + /// listed values. + std::string kind; + + /// Set of attributes represented as an array of strings. Before introducing + /// additional values, try to use the listed values. + std::vector attributes; + + /// Visibility of variable. Before introducing additional values, try to use + /// the listed values. + std::string visibility; + + /// If true, clients can present the variable with a UI that supports a + /// specific gesture to trigger its evaluation. + /// + /// This mechanism can be used for properties that require executing code when + /// retrieving their value and where the code execution can be expensive + /// and/or produce side-effects. A typical example are properties based on a + /// getter function. + /// + /// Please note that in addition to the `lazy` flag, the variable's + /// `variablesReference` is expected to refer to a variable that will provide + /// the value through another `variable` request. + bool lazy = false; +}; +llvm::json::Value toJSON(const VariablePresentationHint &); +bool fromJSON(const llvm::json::Value &, VariablePresentationHint &, + llvm::json::Path); + +/// A Variable is a name/value pair. +/// +/// The `type` attribute is shown if space permits or when hovering over the +/// variable's name. +/// +/// The `kind` attribute is used to render additional properties of the +/// variable, e.g. different icons can be used to indicate that a variable is +/// public or private. +/// +/// If the value is structured (has children), a handle is provided to retrieve +/// the children with the `variables` request. +/// +/// If the number of named or indexed children is large, the numbers should be +/// returned via the `namedVariables` and `indexedVariables` attributes. +/// +/// The client can use this information to present the children in a paged UI +/// and fetch them in chunks. +struct Variable { + /// The variable's name. + std::string name; + + /// The variable's value. + /// + /// This can be a multi-line text, e.g. for a function the body of a function. + /// + /// For structured variables (which do not have a simple value), it is + /// recommended to provide a one-line representation of the structured object. + /// This helps to identify the structured object in the collapsed state when + /// its children are not yet visible. + /// + /// An empty string can be used if no value should be shown in the UI. + std::string value; + + /// The type of the variable's value. Typically shown in the UI when hovering + /// over the value. + /// + /// This attribute should only be returned by a debug adapter if the + /// corresponding capability `supportsVariableType` is true. + std::string type; + + /// Properties of a variable that can be used to determine how to render the + /// variable in the UI. + std::optional presentationHint; + + /// The evaluatable name of this variable which can be passed to the + /// `evaluate` request to fetch the variable's value. + std::string evaluateName; + + /// If `variablesReference` is > 0, the variable is structured and its + /// children can be retrieved by passing `variablesReference` to the + /// `variables` request as long as execution remains suspended. See 'Lifetime + /// of Object References' in the Overview section for details. + uint64_t variablesReference = 0; + + /// The number of named child variables. + /// + /// The client can use this information to present the children in a paged UI + /// and fetch them in chunks. + uint64_t namedVariables = 0; + + /// The number of indexed child variables. + /// + /// The client can use this information to present the children in a paged UI + /// and fetch them in chunks. + uint64_t indexedVariables = 0; + + /// A memory reference associated with this variable. + /// + /// For pointer type variables, this is generally a reference to the memory + /// address contained in the pointer. + /// + /// For executable data, this reference may later be used in a `disassemble` + /// request. + /// + /// This attribute may be returned by a debug adapter if corresponding + /// capability `supportsMemoryReferences` is true. + lldb::addr_t memoryReference = LLDB_INVALID_ADDRESS; + + /// A reference that allows the client to request the location where the + /// variable is declared. This should be present only if the adapter is likely + /// to be able to resolve the location. + /// + /// This reference shares the same lifetime as the `variablesReference`. See + /// 'Lifetime of Object References' in the Overview section for details. + uint64_t declarationLocationReference = 0; + + /// A reference that allows the client to request the location where the + /// variable's value is declared. For example, if the variable contains a + /// function pointer, the adapter may be able to look up the function's + /// location. This should be present only if the adapter is likely to be able + /// to resolve the location. + /// + /// This reference shares the same lifetime as the `variablesReference`. See + /// 'Lifetime of Object References' in the Overview section for details. + uint64_t valueLocationReference = 0; +}; +llvm::json::Value toJSON(const Variable &); +bool fromJSON(const llvm::json::Value &, Variable &, llvm::json::Path); + } // namespace lldb_dap::protocol #endif diff --git a/lldb/tools/lldb-dap/ProtocolUtils.cpp b/lldb/tools/lldb-dap/ProtocolUtils.cpp index f9e373db74618..775c82fbb7716 100644 --- a/lldb/tools/lldb-dap/ProtocolUtils.cpp +++ b/lldb/tools/lldb-dap/ProtocolUtils.cpp @@ -7,9 +7,11 @@ //===----------------------------------------------------------------------===// #include "ProtocolUtils.h" +#include "JSONUtils.h" #include "LLDBUtils.h" #include "lldb/API/SBDebugger.h" +#include "lldb/API/SBDeclaration.h" #include "lldb/API/SBFormat.h" #include "lldb/API/SBMutex.h" #include "lldb/API/SBStream.h" @@ -227,9 +229,9 @@ std::vector GetThreads(lldb::SBProcess process, return threads; } -protocol::ExceptionBreakpointsFilter +ExceptionBreakpointsFilter CreateExceptionBreakpointFilter(const ExceptionBreakpoint &bp) { - protocol::ExceptionBreakpointsFilter filter; + ExceptionBreakpointsFilter filter; filter.filter = bp.GetFilter(); filter.label = bp.GetLabel(); filter.description = bp.GetLabel(); @@ -238,4 +240,68 @@ CreateExceptionBreakpointFilter(const ExceptionBreakpoint &bp) { return filter; } +Variable CreateVariable(lldb::SBValue v, int64_t var_ref, bool format_hex, + bool auto_variable_summaries, + bool synthetic_child_debugging, bool is_name_duplicated, + std::optional custom_name) { + VariableDescription desc(v, auto_variable_summaries, format_hex, + is_name_duplicated, custom_name); + Variable var; + var.name = desc.name; + var.value = desc.display_value; + var.type = desc.display_type_name; + + if (!desc.evaluate_name.empty()) + var.evaluateName = desc.evaluate_name; + + // If we have a type with many children, we would like to be able to + // give a hint to the IDE that the type has indexed children so that the + // request can be broken up in grabbing only a few children at a time. We + // want to be careful and only call "v.GetNumChildren()" if we have an array + // type or if we have a synthetic child provider producing indexed children. + // We don't want to call "v.GetNumChildren()" on all objects as class, struct + // and union types don't need to be completed if they are never expanded. So + // we want to avoid calling this to only cases where we it makes sense to keep + // performance high during normal debugging. + + // If we have an array type, say that it is indexed and provide the number + // of children in case we have a huge array. If we don't do this, then we + // might take a while to produce all children at onces which can delay your + // debug session. + if (desc.type_obj.IsArrayType()) { + var.indexedVariables = v.GetNumChildren(); + } else if (v.IsSynthetic()) { + // For a type with a synthetic child provider, the SBType of "v" won't tell + // us anything about what might be displayed. Instead, we check if the first + // child's name is "[0]" and then say it is indexed. We call + // GetNumChildren() only if the child name matches to avoid a potentially + // expensive operation. + if (lldb::SBValue first_child = v.GetChildAtIndex(0)) { + llvm::StringRef first_child_name = first_child.GetName(); + if (first_child_name == "[0]") { + size_t num_children = v.GetNumChildren(); + // If we are creating a "[raw]" fake child for each synthetic type, we + // have to account for it when returning indexed variables. + if (synthetic_child_debugging) + ++num_children; + var.indexedVariables = num_children; + } + } + } + + if (v.MightHaveChildren()) + var.variablesReference = var_ref; + + if (v.GetDeclaration().IsValid()) + var.declarationLocationReference = PackLocation(var_ref, false); + + if (ValuePointsToCode(v)) + var.valueLocationReference = PackLocation(var_ref, true); + + if (lldb::addr_t addr = v.GetLoadAddress(); addr != LLDB_INVALID_ADDRESS) + var.memoryReference = addr; + + return var; +} + } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/ProtocolUtils.h b/lldb/tools/lldb-dap/ProtocolUtils.h index d906d8e881158..a1f7ae0661914 100644 --- a/lldb/tools/lldb-dap/ProtocolUtils.h +++ b/lldb/tools/lldb-dap/ProtocolUtils.h @@ -106,6 +106,48 @@ CreateExceptionBreakpointFilter(const ExceptionBreakpoint &bp); /// "2 MB"). std::string ConvertDebugInfoSizeToString(uint64_t debug_size); +/// Create a protocol Variable for the given value. +/// +/// \param[in] v +/// The LLDB value to use when populating out the "Variable" +/// object. +/// +/// \param[in] var_ref +/// The variable reference. Used to identify the value, e.g. +/// in the `variablesReference` or `declarationLocationReference` +/// properties. +/// +/// \param[in] format_hex +/// If set to true the variable will be formatted as hex in +/// the "value" key value pair for the value of the variable. +/// +/// \param[in] auto_variable_summaries +/// If set to true the variable will create an automatic variable summary. +/// +/// \param[in] synthetic_child_debugging +/// Whether to include synthetic children when listing properties of the +/// value. +/// +/// \param[in] is_name_duplicated +/// Whether the same variable name appears multiple times within the same +/// context (e.g. locals). This can happen due to shadowed variables in +/// nested blocks. +/// +/// As VSCode doesn't render two of more variables with the same name, we +/// apply a suffix to distinguish duplicated variables. +/// +/// \param[in] custom_name +/// A provided custom name that is used instead of the SBValue's when +/// creating the JSON representation. +/// +/// \return +/// A Variable representing the given value. +protocol::Variable CreateVariable(lldb::SBValue v, int64_t var_ref, + bool format_hex, bool auto_variable_summaries, + bool synthetic_child_debugging, + bool is_name_duplicated, + std::optional custom_name = {}); + } // namespace lldb_dap #endif diff --git a/lldb/tools/lldb-dap/README.md b/lldb/tools/lldb-dap/README.md index 18bfa9d518b98..f88f3ced6f25f 100644 --- a/lldb/tools/lldb-dap/README.md +++ b/lldb/tools/lldb-dap/README.md @@ -235,7 +235,8 @@ contain the following key/value pairs: | **cwd** | string | | The program working directory. | **env** | dictionary | | Environment variables to set when launching the program. The format of each environment variable string is "VAR=VALUE" for environment variables with values or just "VAR" for environment variables with no values. | **stopOnEntry** | boolean | | Whether to stop program immediately after launching. -| **runInTerminal** | boolean | | Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs. +| **runInTerminal** (deprecated) | boolean | | Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs. +| **console** | string | | Specify where to launch the program: internal console (`internalConsole`), integrated terminal (`integratedTerminal`) or external terminal (`externalTerminal`). Supported from lldb-dap 21.0 version. | **launchCommands** | [string] | | LLDB commands executed to launch the program. For JSON configurations of `"type": "attach"`, the JSON configuration can contain diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index b150dee792c34..801abe73edd7d 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -528,7 +528,23 @@ "runInTerminal": { "type": "boolean", "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs", - "default": false + "default": false, + "deprecationMessage": "Attribute 'runInTerminal' is deprecated, use 'console' instead." + }, + "console": { + "type": "string", + "enum": [ + "internalConsole", + "integratedTerminal", + "externalTerminal" + ], + "enumDescriptions": [ + "Use Debug Console for output (input is not supported).", + "Launch the program inside an integrated terminal in the IDE.", + "Launch the program inside an external terminal window." + ], + "description": "Specify where to launch the program: internal console, integrated terminal or external terminal.", + "default": "internalConsole" }, "timeout": { "type": "number", diff --git a/lldb/unittests/DAP/JSONUtilsTest.cpp b/lldb/unittests/DAP/JSONUtilsTest.cpp index 876980eb4bf4a..86ba2d171a2c0 100644 --- a/lldb/unittests/DAP/JSONUtilsTest.cpp +++ b/lldb/unittests/DAP/JSONUtilsTest.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "JSONUtils.h" -#include "lldb/API/SBModule.h" -#include "lldb/API/SBTarget.h" +#include "lldb/lldb-defines.h" #include "llvm/Support/JSON.h" +#include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" #include @@ -182,3 +182,66 @@ TEST(JSONUtilsTest, GetStrings_NestedArray) { ASSERT_EQ(result.size(), 1UL); EXPECT_EQ(result[0], "string"); } + +TEST(JSONUtilsTest, DecodeMemoryReference) { + EXPECT_EQ(DecodeMemoryReference(""), std::nullopt); + EXPECT_EQ(DecodeMemoryReference("123"), std::nullopt); + EXPECT_EQ(DecodeMemoryReference("0o123"), std::nullopt); + EXPECT_EQ(DecodeMemoryReference("0b1010101"), std::nullopt); + EXPECT_EQ(DecodeMemoryReference("0x123"), 291u); + + { + addr_t addr = LLDB_INVALID_ADDRESS; + json::Path::Root root; + EXPECT_TRUE(DecodeMemoryReference(json::Object{{"mem_ref", "0x123"}}, + "mem_ref", addr, root, + /*required=*/true)); + EXPECT_EQ(addr, 291u); + } + + { + addr_t addr = LLDB_INVALID_ADDRESS; + json::Path::Root root; + EXPECT_TRUE(DecodeMemoryReference(json::Object{}, "mem_ref", addr, root, + /*required=*/false)); + } + + { + addr_t addr = LLDB_INVALID_ADDRESS; + json::Path::Root root; + EXPECT_FALSE(DecodeMemoryReference(json::Value{"string"}, "mem_ref", addr, + root, + /*required=*/true)); + EXPECT_THAT_ERROR(root.getError(), FailedWithMessage("expected object")); + } + + { + addr_t addr = LLDB_INVALID_ADDRESS; + json::Path::Root root; + EXPECT_FALSE(DecodeMemoryReference(json::Object{}, "mem_ref", addr, root, + /*required=*/true)); + EXPECT_THAT_ERROR(root.getError(), + FailedWithMessage("missing value at (root).mem_ref")); + } + + { + addr_t addr = LLDB_INVALID_ADDRESS; + json::Path::Root root; + EXPECT_FALSE(DecodeMemoryReference(json::Object{{"mem_ref", 123}}, + "mem_ref", addr, root, + /*required=*/true)); + EXPECT_THAT_ERROR(root.getError(), + FailedWithMessage("expected string at (root).mem_ref")); + } + + { + addr_t addr = LLDB_INVALID_ADDRESS; + json::Path::Root root; + EXPECT_FALSE(DecodeMemoryReference(json::Object{{"mem_ref", "123"}}, + "mem_ref", addr, root, + /*required=*/true)); + EXPECT_THAT_ERROR( + root.getError(), + FailedWithMessage("malformed memory reference at (root).mem_ref")); + } +} diff --git a/lldb/unittests/DAP/ProtocolTypesTest.cpp b/lldb/unittests/DAP/ProtocolTypesTest.cpp index b5cf06bd6f0b6..8add315f47036 100644 --- a/lldb/unittests/DAP/ProtocolTypesTest.cpp +++ b/lldb/unittests/DAP/ProtocolTypesTest.cpp @@ -883,3 +883,120 @@ TEST(ProtocolTypesTest, ModulesResponseBody) { ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); EXPECT_EQ(pp(*expected), pp(response)); } + +TEST(ProtocolTypesTest, VariablePresentationHint) { + VariablePresentationHint hint; + hint.kind = "kind"; + hint.attributes = {"a", "b", "c"}; + hint.visibility = "public"; + hint.lazy = true; + + const StringRef json = R"({ + "attributes": [ + "a", + "b", + "c" + ], + "kind": "kind", + "lazy": true, + "visibility": "public" +})"; + + EXPECT_EQ(pp(Value(hint)), json); + EXPECT_THAT_EXPECTED(json::parse(json), HasValue(Value(hint))); +} + +TEST(ProtocolTypesTest, Variable) { + Variable var; + var.name = "var1"; + var.variablesReference = 42; + var.value = "value"; + var.type = "type"; + + VariablePresentationHint hint; + hint.kind = "kind"; + var.presentationHint = std::move(hint); + var.evaluateName = "my_name"; + var.namedVariables = 7; + var.indexedVariables = 7; + var.memoryReference = 291u; + var.declarationLocationReference = 24; + var.valueLocationReference = 100; + + const StringRef json = R"({ + "declarationLocationReference": 24, + "evaluateName": "my_name", + "indexedVariables": 7, + "memoryReference": "0x123", + "name": "var1", + "namedVariables": 7, + "presentationHint": { + "kind": "kind" + }, + "type": "type", + "value": "value", + "valueLocationReference": 100, + "variablesReference": 42 +})"; + + EXPECT_EQ(pp(Value(var)), json); + EXPECT_THAT_EXPECTED(json::parse(json), HasValue(Value(var))); +} + +TEST(ProtocolTypesTest, VariablesArguments) { + llvm::Expected expected = parse(R"({ + "variablesReference": 42, + "filter": "indexed", + "start": 10, + "count": 5, + "format": { + "hex": true + } + })"); + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + EXPECT_EQ(expected->variablesReference, 42u); + EXPECT_EQ(expected->filter, VariablesArguments::eVariablesFilterIndexed); + EXPECT_EQ(expected->start, 10u); + EXPECT_EQ(expected->count, 5u); + EXPECT_EQ(expected->format->hex, true); + + EXPECT_THAT_EXPECTED( + parse(R"({})"), + FailedWithMessage("missing value at (root).variablesReference")); + EXPECT_THAT_EXPECTED( + parse( + R"({"variablesReference": 42, "filter": "my-filter"})"), + FailedWithMessage( + "unexpected value, expected 'named' or 'indexed' at (root).filter")); +} + +TEST(ProtocolTypesTest, VariablesResponseBody) { + Variable var1; + var1.name = "var1"; + var1.variablesReference = 42; + var1.value = ""; + + Variable var2; + var2.name = "var2"; + var2.variablesReference = 3; + var2.value = ""; + + VariablesResponseBody response{{var1, var2}}; + + Expected expected = json::parse(R"({ + "variables": [ + { + "name": "var1", + "value": "", + "variablesReference": 42 + }, + { + "name": "var2", + "value": "", + "variablesReference": 3 + } + ] + })"); + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + EXPECT_EQ(pp(*expected), pp(response)); +} diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index 67a628d4953c3..9a2e73a1e3718 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -4,10 +4,6 @@ # Adds the name of the generated file to TABLEGEN_OUTPUT. include(LLVMDistributionSupport) -# Clear out any pre-existing compile_commands file before processing. This -# allows for generating a clean compile_commands on each configure. -file(REMOVE ${CMAKE_BINARY_DIR}/tablegen_compile_commands.yml) - function(tablegen project ofn) cmake_parse_arguments(ARG "" "" "DEPENDS;EXTRA_INCLUDES" ${ARGN}) @@ -250,3 +246,11 @@ macro(add_tablegen target project) set_property(GLOBAL APPEND PROPERTY ${export_upper}_EXPORTS ${target}) endif() endmacro() + +# Make sure 'tablegen_compile_commands.yml' is only deleted once the very +# first time this file is included. +include_guard(GLOBAL) + +# Clear out any pre-existing compile_commands file before processing. This +# allows for generating a clean compile_commands on each configure. +file(REMOVE ${CMAKE_BINARY_DIR}/tablegen_compile_commands.yml) diff --git a/llvm/docs/Remarks.rst b/llvm/docs/Remarks.rst index 3be66e5adac95..c89940f9ff4d5 100644 --- a/llvm/docs/Remarks.rst +++ b/llvm/docs/Remarks.rst @@ -57,6 +57,11 @@ Enabling optimization remarks There are two modes that are supported for enabling optimization remarks in LLVM: through remark diagnostics, or through serialized remarks. +See also the clang flags +`-Rpass `_ +and +`-fsave-optimization-record `_. + Remark diagnostics ------------------ diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 7d1caa6438906..212c2e1c86a65 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -688,6 +688,77 @@ LLVM_C_ABI LLVMMetadataRef LLVMDIBuilderCreateArrayType( LLVMDIBuilderRef Builder, uint64_t Size, uint32_t AlignInBits, LLVMMetadataRef Ty, LLVMMetadataRef *Subscripts, unsigned NumSubscripts); +/** + * Create debugging information entry for a set. + * \param Builder The DIBuilder. + * \param Scope The scope in which the set is defined. + * \param Name A name that uniquely identifies this set. + * \param NameLen The length of the C string passed to \c Name. + * \param File File where the set is located. + * \param Line Line number of the declaration. + * \param SizeInBits Set size. + * \param AlignInBits Set alignment. + * \param BaseTy The base type of the set. + */ +LLVMMetadataRef LLVMDIBuilderCreateSetType( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, + size_t NameLen, LLVMMetadataRef File, unsigned LineNumber, + uint64_t SizeInBits, uint32_t AlignInBits, LLVMMetadataRef BaseTy); + +/** + * Create a descriptor for a subrange with dynamic bounds. + * \param Builder The DIBuilder. + * \param Scope The scope in which the subrange is defined. + * \param Name A name that uniquely identifies this subrange. + * \param NameLen The length of the C string passed to \c Name. + * \param LineNo Line number. + * \param File File where the subrange is located. + * \param SizeInBits Member size. + * \param AlignInBits Member alignment. + * \param Flags Flags. + * \param BaseTy The base type of the subrange. eg integer or enumeration + * \param LowerBound Lower bound of the subrange. + * \param UpperBound Upper bound of the subrange. + * \param Stride Stride of the subrange. + * \param Bias Bias of the subrange. + */ +LLVMMetadataRef LLVMDIBuilderCreateSubrangeType( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, + size_t NameLen, unsigned LineNo, LLVMMetadataRef File, uint64_t SizeInBits, + uint32_t AlignInBits, LLVMDIFlags Flags, LLVMMetadataRef BaseTy, + LLVMMetadataRef LowerBound, LLVMMetadataRef UpperBound, + LLVMMetadataRef Stride, LLVMMetadataRef Bias); + +/** + * Create debugging information entry for a dynamic array. + * \param Builder The DIBuilder. + * \param Size Array size. + * \param AlignInBits Alignment. + * \param Ty Element type. + * \param Subscripts Subscripts. + * \param NumSubscripts Number of subscripts. + * \param DataLocation DataLocation. (DIVariable, DIExpression or NULL) + * \param Associated Associated. (DIVariable, DIExpression or NULL) + * \param Allocated Allocated. (DIVariable, DIExpression or NULL) + * \param Rank Rank. (DIVariable, DIExpression or NULL) + * \param BitStride BitStride. + */ +LLVMMetadataRef LLVMDIBuilderCreateDynamicArrayType( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, + size_t NameLen, unsigned LineNo, LLVMMetadataRef File, uint64_t Size, + uint32_t AlignInBits, LLVMMetadataRef Ty, LLVMMetadataRef *Subscripts, + unsigned NumSubscripts, LLVMMetadataRef DataLocation, + LLVMMetadataRef Associated, LLVMMetadataRef Allocated, LLVMMetadataRef Rank, + LLVMMetadataRef BitStride); + +/** + * Replace arrays. + * + * @see DIBuilder::replaceArrays() + */ +void LLVMReplaceArrays(LLVMDIBuilderRef Builder, LLVMMetadataRef *T, + LLVMMetadataRef *Elements, unsigned NumElements); + /** * Create debugging information entry for a vector type. * \param Builder The DIBuilder. diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h index 07c37e353a40b..b850223c953da 100644 --- a/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/llvm/include/llvm/ADT/DenseMapInfo.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -320,6 +321,28 @@ struct DenseMapInfo>> { static bool isEqual(const Enum &LHS, const Enum &RHS) { return LHS == RHS; } }; + +template struct DenseMapInfo> { + using Optional = std::optional; + using Info = DenseMapInfo; + + static inline Optional getEmptyKey() { return {Info::getEmptyKey()}; } + + static inline Optional getTombstoneKey() { return {Info::getTombstoneKey()}; } + + static unsigned getHashValue(const Optional &OptionalVal) { + return detail::combineHashValue( + OptionalVal.has_value(), + Info::getHashValue(OptionalVal.value_or(Info::getEmptyKey()))); + } + + static bool isEqual(const Optional &LHS, const Optional &RHS) { + if (LHS && RHS) { + return Info::isEqual(LHS.value(), RHS.value()); + } + return !LHS && !RHS; + } +}; } // end namespace llvm #endif // LLVM_ADT_DENSEMAPINFO_H diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 1415da14a3494..73bfe1aabb4e0 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -423,6 +423,11 @@ class MemoryDepChecker { getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B, Instruction *BInst); + + // Return true if we can prove that \p Sink only accesses memory after \p + // Src's end or vice versa. + bool areAccessesCompletelyBeforeOrAfter(const SCEV *Src, Type *SrcTy, + const SCEV *Sink, Type *SinkTy); }; class RuntimePointerChecking; diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h index e9b573733451b..5c81c48a80303 100644 --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -23,6 +23,7 @@ #include #include #include +#include "llvm/ADT/DenseMap.h" namespace llvm { @@ -93,6 +94,18 @@ class BitcodeWriter { bool GenerateHash = false, ModuleHash *ModHash = nullptr); + + +//This is for Writing Backword Compatible Bitcode with Non-Opaque types + +LLVM_ABI void writeBitcodeWithNonOpaqueTypes(const Module &M, + bool ShouldPreserveUseListOrder = false, + const ModuleSummaryIndex *Index = nullptr, + bool GenerateHash = false, + ModuleHash *ModHash = nullptr, + bool WriteNonOpaqueTypes = true, + DenseMap *NonOpaqueTypeMap = nullptr); + /// Write the specified thin link bitcode file (i.e., the minimized bitcode /// file) to the buffer specified at construction time. The thin link /// bitcode file is used for thin link, and it only contains the necessary diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 08e6a0e3ef629..f0cfa7663c5fa 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" @@ -27,12 +26,13 @@ #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/UniqueBBID.h" #include "llvm/Target/TargetMachine.h" namespace llvm { // This struct represents the cluster information for a machine basic block, -// which is specifed by a unique ID (`MachineBasicBlock::BBID`). +// which is specifed by a unique basic block ID. struct BBClusterInfo { // Basic block ID. UniqueBBID BBID; @@ -52,27 +52,6 @@ struct FunctionPathAndClusterInfo { SmallVector> ClonePaths; }; -// Provides DenseMapInfo for UniqueBBID. -template <> struct DenseMapInfo { - static inline UniqueBBID getEmptyKey() { - unsigned EmptyKey = DenseMapInfo::getEmptyKey(); - return UniqueBBID{EmptyKey, EmptyKey}; - } - static inline UniqueBBID getTombstoneKey() { - unsigned TombstoneKey = DenseMapInfo::getTombstoneKey(); - return UniqueBBID{TombstoneKey, TombstoneKey}; - } - static unsigned getHashValue(const UniqueBBID &Val) { - std::pair PairVal = - std::make_pair(Val.BaseID, Val.CloneID); - return DenseMapInfo>::getHashValue(PairVal); - } - static bool isEqual(const UniqueBBID &LHS, const UniqueBBID &RHS) { - return DenseMapInfo::isEqual(LHS.BaseID, RHS.BaseID) && - DenseMapInfo::isEqual(LHS.CloneID, RHS.CloneID); - } -}; - class BasicBlockSectionsProfileReader { public: friend class BasicBlockSectionsProfileReaderWrapperPass; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index fefd36ec54ae2..f6936b98bf3e4 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1773,6 +1773,39 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } } + if (ICA.getID() == Intrinsic::vp_scatter) { + if (ICA.isTypeBasedOnly()) { + IntrinsicCostAttributes MaskedScatter( + *VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID()), + ICA.getReturnType(), ArrayRef(ICA.getArgTypes()).drop_back(1), + ICA.getFlags()); + return getTypeBasedIntrinsicInstrCost(MaskedScatter, CostKind); + } + Align Alignment; + if (auto *VPI = dyn_cast_or_null(ICA.getInst())) + Alignment = VPI->getPointerAlignment().valueOrOne(); + bool VarMask = isa(ICA.getArgs()[2]); + return thisT()->getGatherScatterOpCost( + Instruction::Store, ICA.getArgTypes()[0], ICA.getArgs()[1], VarMask, + Alignment, CostKind, nullptr); + } + if (ICA.getID() == Intrinsic::vp_gather) { + if (ICA.isTypeBasedOnly()) { + IntrinsicCostAttributes MaskedGather( + *VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID()), + ICA.getReturnType(), ArrayRef(ICA.getArgTypes()).drop_back(1), + ICA.getFlags()); + return getTypeBasedIntrinsicInstrCost(MaskedGather, CostKind); + } + Align Alignment; + if (auto *VPI = dyn_cast_or_null(ICA.getInst())) + Alignment = VPI->getPointerAlignment().valueOrOne(); + bool VarMask = isa(ICA.getArgs()[1]); + return thisT()->getGatherScatterOpCost( + Instruction::Load, ICA.getReturnType(), ICA.getArgs()[0], VarMask, + Alignment, CostKind, nullptr); + } + if (ICA.getID() == Intrinsic::vp_select || ICA.getID() == Intrinsic::vp_merge) { TTI::OperandValueInfo OpInfoX, OpInfoY; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 7a598bb77b356..756c0b24a6f8b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -2424,6 +2424,11 @@ class LLVM_ABI MachineIRBuilder { return buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {}); } + /// Build and insert \p Dst = G_GET_ROUNDING + MachineInstrBuilder buildGetRounding(const DstOp &Dst) { + return buildInstr(TargetOpcode::G_GET_ROUNDING, {Dst}, {}); + } + virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef DstOps, ArrayRef SrcOps, std::optional Flags = std::nullopt); diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 3d2da01f2c856..938d71dd030e8 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -25,6 +25,7 @@ #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/UniqueBBID.h" #include #include #include @@ -99,13 +100,6 @@ template <> struct DenseMapInfo { } }; -// This structure represents the information for a basic block pertaining to -// the basic block sections profile. -struct UniqueBBID { - unsigned BaseID; - unsigned CloneID; -}; - template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h index e4e794c434adb..e50443d25cc60 100644 --- a/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -120,14 +120,17 @@ class SwingSchedulerDDGEdge { SUnit *Dst = nullptr; SDep Pred; unsigned Distance = 0; + bool IsValidationOnly = false; public: /// Creates an edge corresponding to an edge represented by \p PredOrSucc and /// \p Dep in the original DAG. This pair has no information about the /// direction of the edge, so we need to pass an additional argument \p /// IsSucc. - SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc) - : Dst(PredOrSucc), Pred(Dep), Distance(0u) { + SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc, + bool IsValidationOnly) + : Dst(PredOrSucc), Pred(Dep), Distance(0u), + IsValidationOnly(IsValidationOnly) { SUnit *Src = Dep.getSUnit(); if (IsSucc) { @@ -188,6 +191,10 @@ class SwingSchedulerDDGEdge { /// functions. We ignore the back-edge recurrence in order to avoid unbounded /// recursion in the calculation of the ASAP, ALAP, etc functions. bool ignoreDependence(bool IgnoreAnti) const; + + /// Returns true if this edge is intended to be used only for validating the + /// schedule. + bool isValidationOnly() const { return IsValidationOnly; } }; /// Represents loop-carried dependencies. Because SwingSchedulerDAG doesn't @@ -208,25 +215,21 @@ struct LoopCarriedEdges { return &Ite->second; } - /// Retruns true if the edge from \p From to \p To is a back-edge that should - /// be used when scheduling. - bool shouldUseWhenScheduling(const SUnit *From, const SUnit *To) const; - /// Adds some edges to the original DAG that correspond to loop-carried /// dependencies. Historically, loop-carried edges are represented by using /// non-loop-carried edges in the original DAG. This function appends such /// edges to preserve the previous behavior. - void modifySUnits(std::vector &SUnits); + void modifySUnits(std::vector &SUnits, const TargetInstrInfo *TII); void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const; }; -/// Represents dependencies between instructions. This class is a wrapper of -/// `SUnits` and its dependencies to manipulate back-edges in a natural way. -/// Currently it only supports back-edges via PHI, which are expressed as -/// anti-dependencies in the original DAG. -/// FIXME: Support any other loop-carried dependencies +/// This class provides APIs to retrieve edges from/to an SUnit node, with a +/// particular focus on loop-carried dependencies. Since SUnit is not designed +/// to represent such edges, handling them directly using its APIs has required +/// non-trivial logic in the past. This class serves as a wrapper around SUnit, +/// offering a simpler interface for managing these dependencies. class SwingSchedulerDDG { using EdgesType = SmallVector; @@ -244,17 +247,26 @@ class SwingSchedulerDDG { SwingSchedulerDDGEdges EntrySUEdges; SwingSchedulerDDGEdges ExitSUEdges; + /// Edges that are used only when validating the schedule. These edges are + /// not considered to drive the optimization heuristics. + SmallVector ValidationOnlyEdges; + + /// Adds a NON-validation-only edge to the DDG. Assumes to be called only by + /// the ctor. void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge); SwingSchedulerDDGEdges &getEdges(const SUnit *SU); const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const; public: - SwingSchedulerDDG(std::vector &SUnits, SUnit *EntrySU, SUnit *ExitSU); + SwingSchedulerDDG(std::vector &SUnits, SUnit *EntrySU, SUnit *ExitSU, + const LoopCarriedEdges &LCE); const EdgesType &getInEdges(const SUnit *SU) const; const EdgesType &getOutEdges(const SUnit *SU) const; + + bool isValidSchedule(const SMSchedule &Schedule) const; }; /// This class builds the dependence graph for the instructions in a loop, diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index b856b4786573b..657951ddafd4f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -2488,8 +2488,7 @@ class SelectionDAG { /// Check if a value \op N is a constant using the target's BooleanContent for /// its type. - LLVM_ABI std::optional - isBoolConstant(SDValue N, bool AllowTruncation = false) const; + LLVM_ABI std::optional isBoolConstant(SDValue N) const; /// Set CallSiteInfo to be associated with Node. void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo) { diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index c9e5d9999138f..a248eb7444b20 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4371,6 +4371,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase { Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS; } + /// Return true if the given select/vselect should be considered canonical and + /// not be transformed. Currently only used for "vselect (not Cond), N1, N2 -> + /// vselect Cond, N2, N1". + virtual bool isTargetCanonicalSelect(SDNode *N) const { return false; } + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. CombineLevel Level; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index f2610011a7e04..1b94657dfae1e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -955,11 +955,12 @@ def OMP_Parallel : Directive<[Spelling<"parallel">]> { VersionedClause, VersionedClause, ]; - let allowedOnceClauses = [ - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, + let allowedOnceClauses = [VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, ]; let association = AS_Block; let category = CA_Executable; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 16885f331e9dd..8016757cf0f3c 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3510,6 +3510,18 @@ def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">, // gfx1250 intrinsics // ===----------------------------------------------------------------------===// +// Async waits decrement ASYNCcnt and tensor waits decrement TENSORcnt which is +// modeled as InaccessibleMem. +class AMDGPUWaitAsyncIntrinsic : + Intrinsic<[], [llvm_i16_ty], + [IntrInaccessibleMemOnly, ImmArg>, IntrWillReturn, IntrNoCallback, + IntrNoFree]>; + +def int_amdgcn_s_wait_asynccnt : + ClangBuiltin<"__builtin_amdgcn_s_wait_asynccnt">, AMDGPUWaitAsyncIntrinsic; +def int_amdgcn_s_wait_tensorcnt : + ClangBuiltin<"__builtin_amdgcn_s_wait_tensorcnt">, AMDGPUWaitAsyncIntrinsic; + def int_amdgcn_ds_atomic_async_barrier_arrive_b64 : ClangBuiltin<"__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64">, Intrinsic<[], [local_ptr_ty], diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index cd676e1661d62..c236e698759cc 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -387,6 +387,34 @@ def HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES : RuntimeLibcall; // XCore calls def MEMCPY_ALIGN_4 : RuntimeLibcall; +// Objective-C calls +def OBJC_AUTORELEASE : RuntimeLibcall; +def OBJC_AUTORELEASEPOOLPOP : RuntimeLibcall; +def OBJC_AUTORELEASEPOOLPUSH : RuntimeLibcall; +def OBJC_AUTORELEASERETURNVALUE : RuntimeLibcall; +def OBJC_COPYWEAK : RuntimeLibcall; +def OBJC_DESTROYWEAK : RuntimeLibcall; +def OBJC_INITWEAK : RuntimeLibcall; +def OBJC_LOADWEAK : RuntimeLibcall; +def OBJC_LOADWEAKRETAINED : RuntimeLibcall; +def OBJC_MOVEWEAK : RuntimeLibcall; +def OBJC_RELEASE : RuntimeLibcall; +def OBJC_RETAIN : RuntimeLibcall; +def OBJC_RETAINAUTORELEASE : RuntimeLibcall; +def OBJC_RETAINAUTORELEASERETURNVALUE : RuntimeLibcall; +def OBJC_RETAINAUTORELEASEDRETURNVALUE : RuntimeLibcall; +def OBJC_CLAIMAUTORELEASEDRETURNVALUE : RuntimeLibcall; +def OBJC_RETAINBLOCK : RuntimeLibcall; +def OBJC_STORESTRONG : RuntimeLibcall; +def OBJC_STOREWEAK : RuntimeLibcall; +def OBJC_UNSAFECLAIMAUTORELEASEDRETURNVALUE : RuntimeLibcall; +def OBJC_RETAINEDOBJECT : RuntimeLibcall; +def OBJC_UNRETAINEDOBJECT : RuntimeLibcall; +def OBJC_UNRETAINEDPOINTER : RuntimeLibcall; +def OBJC_RETAIN_AUTORELEASE : RuntimeLibcall; +def OBJC_SYNC_ENTER : RuntimeLibcall; +def OBJC_SYNC_EXIT : RuntimeLibcall; + //-------------------------------------------------------------------- // Define implementation default libcalls //-------------------------------------------------------------------- @@ -1032,6 +1060,37 @@ defvar LibmHasSinCosF80 = LibcallImpls<(add sincos_f80), hasSinCos>; defvar LibmHasSinCosF128 = LibcallImpls<(add sincos_f128), hasSinCos>; defvar LibmHasSinCosPPCF128 = LibcallImpls<(add sincos_ppcf128), hasSinCos>; +//===----------------------------------------------------------------------===// +// Objective-C Runtime Libcalls +//===----------------------------------------------------------------------===// + +def objc_autorelease : RuntimeLibcallImpl; +def objc_autoreleasePoolPop : RuntimeLibcallImpl; +def objc_autoreleasePoolPush : RuntimeLibcallImpl; +def objc_autoreleaseReturnValue : RuntimeLibcallImpl; +def objc_copyWeak : RuntimeLibcallImpl; +def objc_destroyWeak : RuntimeLibcallImpl; +def objc_initWeak : RuntimeLibcallImpl; +def objc_loadWeak : RuntimeLibcallImpl; +def objc_loadWeakRetained : RuntimeLibcallImpl; +def objc_moveWeak : RuntimeLibcallImpl; +def objc_release : RuntimeLibcallImpl; +def objc_retain : RuntimeLibcallImpl; +def objc_retainAutorelease : RuntimeLibcallImpl; +def objc_retainAutoreleaseReturnValue : RuntimeLibcallImpl; +def objc_retainAutoreleasedReturnValue : RuntimeLibcallImpl; +def objc_claimAutoreleasedReturnValue : RuntimeLibcallImpl; +def objc_retainBlock : RuntimeLibcallImpl; +def objc_storeStrong : RuntimeLibcallImpl; +def objc_storeWeak : RuntimeLibcallImpl; +def objc_unsafeClaimAutoreleasedReturnValue : RuntimeLibcallImpl; +def objc_retainedObject : RuntimeLibcallImpl; +def objc_unretainedObject : RuntimeLibcallImpl; +def objc_unretainedPointer : RuntimeLibcallImpl; +def objc_retain_autorelease : RuntimeLibcallImpl; +def objc_sync_enter : RuntimeLibcallImpl; +def objc_sync_exit : RuntimeLibcallImpl; + //===----------------------------------------------------------------------===// // AArch64 Runtime Libcalls //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index d2d0f22309fd0..f22b376c3ab5b 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -18,6 +18,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/UniqueBBID.h" #include #include #include @@ -926,6 +927,8 @@ struct BBAddrMap { : ID(ID), Offset(Offset), Size(Size), MD(MD), CallsiteOffsets(std::move(CallsiteOffsets)) {} + UniqueBBID getID() const { return {ID, 0}; } + bool operator==(const BBEntry &Other) const { return ID == Other.ID && Offset == Other.Offset && Size == Other.Size && MD == Other.MD && CallsiteOffsets == Other.CallsiteOffsets; diff --git a/llvm/include/llvm/Support/Endian.h b/llvm/include/llvm/Support/Endian.h index 574f9508420a0..02a3194e09784 100644 --- a/llvm/include/llvm/Support/Endian.h +++ b/llvm/include/llvm/Support/Endian.h @@ -223,10 +223,11 @@ struct packed_endian_specific_integral { explicit packed_endian_specific_integral(value_type val) { *this = val; } - operator value_type() const { + value_type value() const { return endian::read( (const void*)Value.buffer); } + operator value_type() const { return value(); } void operator=(value_type newValue) { endian::write( diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 6ba0290cc77a6..b905576b61791 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -744,6 +744,8 @@ HANDLE_TARGET_OPCODE(G_GET_FPMODE) HANDLE_TARGET_OPCODE(G_SET_FPMODE) HANDLE_TARGET_OPCODE(G_RESET_FPMODE) +HANDLE_TARGET_OPCODE(G_GET_ROUNDING) + /// Generic pointer offset HANDLE_TARGET_OPCODE(G_PTR_ADD) diff --git a/llvm/include/llvm/Support/UniqueBBID.h b/llvm/include/llvm/Support/UniqueBBID.h new file mode 100644 index 0000000000000..a5715cd107629 --- /dev/null +++ b/llvm/include/llvm/Support/UniqueBBID.h @@ -0,0 +1,50 @@ +//===- llvm/Support/UniqueBBID.h --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Unique fixed ID assigned to basic blocks upon their creation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_UNIQUEBBID_H +#define LLVM_SUPPORT_UNIQUEBBID_H + +#include "llvm/ADT/DenseMapInfo.h" + +namespace llvm { + +// This structure represents the information for a basic block pertaining to +// the basic block sections profile. +struct UniqueBBID { + unsigned BaseID; + unsigned CloneID; +}; + +// Provides DenseMapInfo for UniqueBBID. +template <> struct DenseMapInfo { + static inline UniqueBBID getEmptyKey() { + unsigned EmptyKey = DenseMapInfo::getEmptyKey(); + return UniqueBBID{EmptyKey, EmptyKey}; + } + static inline UniqueBBID getTombstoneKey() { + unsigned TombstoneKey = DenseMapInfo::getTombstoneKey(); + return UniqueBBID{TombstoneKey, TombstoneKey}; + } + static unsigned getHashValue(const UniqueBBID &Val) { + std::pair PairVal = + std::make_pair(Val.BaseID, Val.CloneID); + return DenseMapInfo>::getHashValue(PairVal); + } + static bool isEqual(const UniqueBBID &LHS, const UniqueBBID &RHS) { + return DenseMapInfo::isEqual(LHS.BaseID, RHS.BaseID) && + DenseMapInfo::isEqual(LHS.CloneID, RHS.CloneID); + } +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_UNIQUEBBID_H diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index bcf49b448e782..ce4750db88c9a 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1267,6 +1267,12 @@ def G_READSTEADYCOUNTER : GenericInstruction { let hasSideEffects = true; } +def G_GET_ROUNDING : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins); + let hasSideEffects = true; +} + //------------------------------------------------------------------------------ // Memory ops //------------------------------------------------------------------------------ diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 5b85060f9caa1..428342f51ad2e 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -187,6 +187,20 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, OS << " da analyze - "; if (auto D = DA->depends(&*SrcI, &*DstI, /*UnderRuntimeAssumptions=*/true)) { + +#ifndef NDEBUG + // Verify that the distance being zero is equivalent to the + // direction being EQ. + for (unsigned Level = 1; Level <= D->getLevels(); Level++) { + const SCEV *Distance = D->getDistance(Level); + bool IsDistanceZero = Distance && Distance->isZero(); + bool IsDirectionEQ = + D->getDirection(Level) == Dependence::DVEntry::EQ; + assert(IsDistanceZero == IsDirectionEQ && + "Inconsistent distance and direction."); + } +#endif + // Normalize negative direction vectors if required by clients. if (NormalizeResults && D->normalize(&SE)) OS << "normalized - "; @@ -3991,6 +4005,28 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (CompleteLoops[II]) Result.DV[II - 1].Scalar = false; + // Set the distance to zero if the direction is EQ. + // TODO: Ideally, the distance should be set to 0 immediately simultaneously + // with the corresponding direction being set to EQ. + for (unsigned II = 1; II <= Result.getLevels(); ++II) { + if (Result.getDirection(II) == Dependence::DVEntry::EQ) { + if (Result.DV[II - 1].Distance == nullptr) + Result.DV[II - 1].Distance = SE->getZero(SrcSCEV->getType()); + else + assert(Result.DV[II - 1].Distance->isZero() && + "Inconsistency between distance and direction"); + } + +#ifndef NDEBUG + // Check that the converse (i.e., if the distance is zero, then the + // direction is EQ) holds. + const SCEV *Distance = Result.getDistance(II); + if (Distance && Distance->isZero()) + assert(Result.getDirection(II) == Dependence::DVEntry::EQ && + "Distance is zero, but direction is not EQ"); +#endif + } + if (PossiblyLoopIndependent) { // Make sure the LoopIndependent flag is set correctly. // All directions must include equal, otherwise no diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 7b71dad23948c..be6ffdbb1aabe 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1954,6 +1954,37 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, return Distance % Stride; } +bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src, + Type *SrcTy, + const SCEV *Sink, + Type *SinkTy) { + const SCEV *BTC = PSE.getBackedgeTakenCount(); + const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); + ScalarEvolution &SE = *PSE.getSE(); + const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess( + InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); + if (isa(SrcStart_) || isa(SrcEnd_)) + return false; + + const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess( + InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); + if (isa(SinkStart_) || + isa(SinkEnd_)) + return false; + + if (!LoopGuards) + LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(InnermostLoop, SE)); + + auto SrcEnd = SE.applyLoopGuards(SrcEnd_, *LoopGuards); + auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards); + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) + return true; + + auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards); + auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards); + return SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart); +} + std::variant MemoryDepChecker::getDependenceDistanceStrideAndSize( @@ -2001,37 +2032,13 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst << ": " << *Dist << "\n"); - // Check if we can prove that Sink only accesses memory after Src's end or - // vice versa. At the moment this is limited to cases where either source or + // At the moment this is limited to cases where either source or // sink are loop invariant to avoid compile-time increases. This is not // required for correctness. if (SE.isLoopInvariant(Src, InnermostLoop) || SE.isLoopInvariant(Sink, InnermostLoop)) { - const SCEV *BTC = PSE.getBackedgeTakenCount(); - const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); - const auto &[SrcStart_, SrcEnd_] = - getStartAndEndForAccess(InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, - PSE.getSE(), &PointerBounds); - const auto &[SinkStart_, SinkEnd_] = - getStartAndEndForAccess(InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, - PSE.getSE(), &PointerBounds); - if (!isa(SrcStart_) && - !isa(SrcEnd_) && - !isa(SinkStart_) && - !isa(SinkEnd_)) { - if (!LoopGuards) - LoopGuards.emplace( - ScalarEvolution::LoopGuards::collect(InnermostLoop, SE)); - auto SrcEnd = SE.applyLoopGuards(SrcEnd_, *LoopGuards); - auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards); - if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) - return MemoryDepChecker::Dependence::NoDep; - - auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards); - auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards); - if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) - return MemoryDepChecker::Dependence::NoDep; - } + if (areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy)) + return Dependence::NoDep; } // Need accesses with constant strides and the same direction for further diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 1c66f5c877f59..24adfa346c642 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -601,6 +601,9 @@ static int CompareValueComplexity(const LoopInfo *const LI, Value *LV, if (const auto *LGV = dyn_cast(LV)) { const auto *RGV = cast(RV); + if (auto L = LGV->getLinkage() - RGV->getLinkage()) + return L; + const auto IsGVNameSemantic = [&](const GlobalValue *GV) { auto LT = GV->getLinkage(); return !(GlobalValue::isPrivateLinkage(LT) || diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp index 2101fdfacfc8f..15107c262980c 100644 --- a/llvm/lib/Analysis/UniformityAnalysis.cpp +++ b/llvm/lib/Analysis/UniformityAnalysis.cpp @@ -146,12 +146,12 @@ char UniformityInfoWrapperPass::ID = 0; UniformityInfoWrapperPass::UniformityInfoWrapperPass() : FunctionPass(ID) {} INITIALIZE_PASS_BEGIN(UniformityInfoWrapperPass, "uniformity", - "Uniformity Analysis", true, true) + "Uniformity Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(UniformityInfoWrapperPass, "uniformity", - "Uniformity Analysis", true, true) + "Uniformity Analysis", false, true) void UniformityInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 7e0d81ff4b196..617b4dffa2199 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -56,6 +56,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" +#include "llvm/IR/TypedPointerType.h" #include "llvm/IR/UseListOrder.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" @@ -315,7 +316,7 @@ class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { /// Emit the current module to the bitstream. void write(); -private: +protected: uint64_t bitcodeStartBit() { return BitcodeStartBit; } size_t addToStrtab(StringRef Str); @@ -5567,6 +5568,986 @@ void BitcodeWriter::writeIndex( IndexWriter.write(); } + + + + + +class NonOpaqueTypeModuleWriter : public ModuleBitcodeWriter { + + public: + NonOpaqueTypeModuleWriter(const Module &M, StringTableBuilder &StrtabBuilder, + BitstreamWriter &Stream, + bool ShouldPreserveUseListOrder, + const ModuleSummaryIndex &Index, + bool GenerateHash, + const ModuleHash &ModHash, + DenseMap *NonOpaqueTypeMap) + : ModuleBitcodeWriter(M, StrtabBuilder, Stream, + ShouldPreserveUseListOrder, &Index, + GenerateHash, const_cast(&ModHash)), + NonOpaqueTypeMap(NonOpaqueTypeMap) {} + + void write(); + + + + private: + DenseMap *NonOpaqueTypeMap; + void writeTypeTable(); + void writeFunction(const Function &F, DenseMap &FunctionToBitcodeIndex); + void writeInstruction(const Instruction &I, unsigned InstID, SmallVectorImpl &Vals); +}; + + +void NonOpaqueTypeModuleWriter::writeTypeTable() { + const ValueEnumerator::TypeList &TypeList = VE.getTypes(); + + Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */); + SmallVector TypeVals; + + uint64_t NumBits = VE.computeBitsRequiredForTypeIndices(); + + + // Abbrev for TYPE_CODE_POINTER. + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0 + unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_FUNCTION. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned FunctionAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_STRUCT_ANON. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned StructAnonAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_STRUCT_NAME. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + unsigned StructNameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_STRUCT_NAMED. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned StructNamedAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Abbrev for TYPE_CODE_ARRAY. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // Emit an entry count so the reader can reserve space. + TypeVals.push_back(TypeList.size()); + Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals); + TypeVals.clear(); + + // Loop over all of the types, emitting each in turn. + for (Type *T : TypeList) { + int AbbrevToUse = 0; + unsigned Code = 0; + + switch (T->getTypeID()) { + case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; + case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break; + case Type::BFloatTyID: Code = bitc::TYPE_CODE_BFLOAT; break; + case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; + case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; + case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; + case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; + case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break; + case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; + case Type::MetadataTyID: + Code = bitc::TYPE_CODE_METADATA; + break; + case Type::X86_AMXTyID: Code = bitc::TYPE_CODE_X86_AMX; break; + case Type::TokenTyID: Code = bitc::TYPE_CODE_TOKEN; break; + case Type::IntegerTyID: + // INTEGER: [width] + Code = bitc::TYPE_CODE_INTEGER; + TypeVals.push_back(cast(T)->getBitWidth()); + break; + case Type::TypedPointerTyID: { + TypedPointerType *PTy = cast(T); + // POINTER: [pointee type, address space] + Code = bitc::TYPE_CODE_POINTER; + TypeVals.push_back(VE.getTypeID(PTy->getElementType())); + unsigned AddressSpace = PTy->getAddressSpace(); + TypeVals.push_back(AddressSpace); + if (AddressSpace == 0) + AbbrevToUse = PtrAbbrev; + break; + } + case Type::PointerTyID: { + PointerType *PTy = cast(T); + Code = bitc::TYPE_CODE_POINTER; + // opaque pointers are unsupported, so emit using an opaque element type + auto ET = StructType::get(PTy->getContext()); + TypeVals.push_back(VE.getTypeID(ET)); + unsigned AddressSpace = PTy->getAddressSpace(); + TypeVals.push_back(AddressSpace); + if (AddressSpace == 0) + AbbrevToUse = PtrAbbrev; + break; + } + case Type::FunctionTyID: { + FunctionType *FT = cast(T); + // FUNCTION: [isvararg, retty, paramty x N] + Code = bitc::TYPE_CODE_FUNCTION; + TypeVals.push_back(FT->isVarArg()); + TypeVals.push_back(VE.getTypeID(FT->getReturnType())); + for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) + TypeVals.push_back(VE.getTypeID(FT->getParamType(i))); + AbbrevToUse = FunctionAbbrev; + break; + } + case Type::StructTyID: { + StructType *ST = cast(T); + // STRUCT: [ispacked, eltty x N] + TypeVals.push_back(ST->isPacked()); + // Output all of the element types. + for (Type *ET : ST->elements()) + TypeVals.push_back(VE.getTypeID(ET)); + + if (ST->isLiteral()) { + Code = bitc::TYPE_CODE_STRUCT_ANON; + AbbrevToUse = StructAnonAbbrev; + } else { + if (ST->isOpaque()) { + Code = bitc::TYPE_CODE_OPAQUE; + } else { + Code = bitc::TYPE_CODE_STRUCT_NAMED; + AbbrevToUse = StructNamedAbbrev; + } + + // Emit the name if it is present. + if (!ST->getName().empty()) + writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), + StructNameAbbrev); + } + break; + } + case Type::ArrayTyID: { + ArrayType *AT = cast(T); + // ARRAY: [numelts, eltty] + Code = bitc::TYPE_CODE_ARRAY; + TypeVals.push_back(AT->getNumElements()); + TypeVals.push_back(VE.getTypeID(AT->getElementType())); + AbbrevToUse = ArrayAbbrev; + break; + } + case Type::FixedVectorTyID: + case Type::ScalableVectorTyID: { + VectorType *VT = cast(T); + // VECTOR [numelts, eltty] or + // [numelts, eltty, scalable] + Code = bitc::TYPE_CODE_VECTOR; + TypeVals.push_back(VT->getElementCount().getKnownMinValue()); + TypeVals.push_back(VE.getTypeID(VT->getElementType())); + if (isa(VT)) + TypeVals.push_back(true); + break; + } + case Type::TargetExtTyID: { + TargetExtType *TET = cast(T); + Code = bitc::TYPE_CODE_TARGET_TYPE; + writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, TET->getName(), + StructNameAbbrev); + TypeVals.push_back(TET->getNumTypeParameters()); + for (Type *InnerTy : TET->type_params()) + TypeVals.push_back(VE.getTypeID(InnerTy)); + llvm::append_range(TypeVals, TET->int_params()); + break; + } + + } + + // Emit the finished record. + Stream.EmitRecord(Code, TypeVals, AbbrevToUse); + TypeVals.clear(); + } + + Stream.ExitBlock(); + + +} + + +void NonOpaqueTypeModuleWriter::writeFunction(const Function &F, DenseMap &FunctionToBitcodeIndex) { +// Save the bitcode index of the start of this function block for recording +// in the VST. +FunctionToBitcodeIndex[&F] = Stream.GetCurrentBitNo(); + +Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 5); +VE.incorporateFunction(F); + +SmallVector Vals; + +// Emit the number of basic blocks, so the reader can create them ahead of +// time. +Vals.push_back(VE.getBasicBlocks().size()); +Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals); +Vals.clear(); + +// If there are function-local constants, emit them now. +unsigned CstStart, CstEnd; +VE.getFunctionConstantRange(CstStart, CstEnd); +writeConstants(CstStart, CstEnd, false); + +// If there is function-local metadata, emit it now. +writeFunctionMetadata(F); + +// Keep a running idea of what the instruction ID is. +unsigned InstID = CstEnd; + +bool NeedsMetadataAttachment = F.hasMetadata(); + +DILocation *LastDL = nullptr; +SmallSetVector BlockAddressUsers; + +// Finally, emit all the instructions, in order. +for (const BasicBlock &BB : F) { +for (const Instruction &I : BB) { + writeInstruction(I, InstID, Vals); + + if (!I.getType()->isVoidTy()) + ++InstID; + + // If the instruction has metadata, write a metadata attachment later. + NeedsMetadataAttachment |= I.hasMetadataOtherThanDebugLoc(); + + // If the instruction has a debug location, emit it. + if (DILocation *DL = I.getDebugLoc()) { + if (DL == LastDL) { + // Just repeat the same debug loc as last time. + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals); + } else { + Vals.push_back(DL->getLine()); + Vals.push_back(DL->getColumn()); + Vals.push_back(VE.getMetadataOrNullID(DL->getScope())); + Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt())); + Vals.push_back(DL->isImplicitCode()); + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); + Vals.clear(); + LastDL = DL; + } + } + + // If the instruction has DbgRecords attached to it, emit them. Note that + // they come after the instruction so that it's easy to attach them again + // when reading the bitcode, even though conceptually the debug locations + // start "before" the instruction. + if (I.hasDbgRecords()) { + /// Try to push the value only (unwrapped), otherwise push the + /// metadata wrapped value. Returns true if the value was pushed + /// without the ValueAsMetadata wrapper. + auto PushValueOrMetadata = [&Vals, InstID, + this](Metadata *RawLocation) { + assert(RawLocation && + "RawLocation unexpectedly null in DbgVariableRecord"); + if (ValueAsMetadata *VAM = dyn_cast(RawLocation)) { + SmallVector ValAndType; + // If the value is a fwd-ref the type is also pushed. We don't + // want the type, so fwd-refs are kept wrapped (pushValueAndType + // returns false if the value is pushed without type). + if (!pushValueAndType(VAM->getValue(), InstID, ValAndType)) { + Vals.push_back(ValAndType[0]); + return true; + } + } + // The metadata is a DIArgList, or ValueAsMetadata wrapping a + // fwd-ref. Push the metadata ID. + Vals.push_back(VE.getMetadataID(RawLocation)); + return false; + }; + + // Write out non-instruction debug information attached to this + // instruction. Write it after the instruction so that it's easy to + // re-attach to the instruction reading the records in. + for (DbgRecord &DR : I.DebugMarker->getDbgRecordRange()) { + if (DbgLabelRecord *DLR = dyn_cast(&DR)) { + Vals.push_back(VE.getMetadataID(&*DLR->getDebugLoc())); + Vals.push_back(VE.getMetadataID(DLR->getLabel())); + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_LABEL, Vals); + Vals.clear(); + continue; + } + + // First 3 fields are common to all kinds: + // DILocation, DILocalVariable, DIExpression + // dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE) + // ..., LocationMetadata + // dbg_value (FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE - abbrev'd) + // ..., Value + // dbg_declare (FUNC_CODE_DEBUG_RECORD_DECLARE) + // ..., LocationMetadata + // dbg_assign (FUNC_CODE_DEBUG_RECORD_ASSIGN) + // ..., LocationMetadata, DIAssignID, DIExpression, LocationMetadata + DbgVariableRecord &DVR = cast(DR); + Vals.push_back(VE.getMetadataID(&*DVR.getDebugLoc())); + Vals.push_back(VE.getMetadataID(DVR.getVariable())); + Vals.push_back(VE.getMetadataID(DVR.getExpression())); + if (DVR.isDbgValue()) { + if (PushValueOrMetadata(DVR.getRawLocation())) + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_VALUE_SIMPLE, Vals, + FUNCTION_DEBUG_RECORD_VALUE_ABBREV); + else + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_VALUE, Vals); + } else if (DVR.isDbgDeclare()) { + Vals.push_back(VE.getMetadataID(DVR.getRawLocation())); + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_DECLARE, Vals); + } else { + assert(DVR.isDbgAssign() && "Unexpected DbgRecord kind"); + Vals.push_back(VE.getMetadataID(DVR.getRawLocation())); + Vals.push_back(VE.getMetadataID(DVR.getAssignID())); + Vals.push_back(VE.getMetadataID(DVR.getAddressExpression())); + Vals.push_back(VE.getMetadataID(DVR.getRawAddress())); + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN, Vals); + } + Vals.clear(); + } + } +} + +if (BlockAddress *BA = BlockAddress::lookup(&BB)) { + SmallVector Worklist{BA}; + SmallPtrSet Visited{BA}; + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + for (User *U : V->users()) { + if (auto *I = dyn_cast(U)) { + Function *P = I->getFunction(); + if (P != &F) + BlockAddressUsers.insert(P); + } else if (isa(U) && !isa(U) && + Visited.insert(U).second) + Worklist.push_back(U); + } + } +} +} + +if (!BlockAddressUsers.empty()) { +Vals.resize(BlockAddressUsers.size()); +for (auto I : llvm::enumerate(BlockAddressUsers)) + Vals[I.index()] = VE.getValueID(I.value()); +Stream.EmitRecord(bitc::FUNC_CODE_BLOCKADDR_USERS, Vals); +Vals.clear(); +} + +// Emit names for all the instructions etc. +if (auto *Symtab = F.getValueSymbolTable()) +writeFunctionLevelValueSymbolTable(*Symtab); + +if (NeedsMetadataAttachment) +writeFunctionMetadataAttachment(F); +if (VE.shouldPreserveUseListOrder()) +writeUseListBlock(&F); +VE.purgeFunction(); +Stream.ExitBlock(); +} + +void NonOpaqueTypeModuleWriter::writeInstruction(const Instruction &I, + unsigned InstID, + SmallVectorImpl &Vals) { + unsigned Code = 0; + unsigned AbbrevToUse = 0; + VE.setInstructionID(&I); + switch (I.getOpcode()) { + default: + if (Instruction::isCast(I.getOpcode())) { + Code = bitc::FUNC_CODE_INST_CAST; + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) + AbbrevToUse = FUNCTION_INST_CAST_ABBREV; + // Use NonOpaqueTypeMap for pointer types if available + Type *DestType = I.getType(); + if (NonOpaqueTypeMap && NonOpaqueTypeMap->count(&I)) { + DestType = (*NonOpaqueTypeMap)[&I]; + } + Vals.push_back(VE.getTypeID(DestType)); + Vals.push_back(getEncodedCastOpcode(I.getOpcode())); + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) { + if (AbbrevToUse == FUNCTION_INST_CAST_ABBREV) + AbbrevToUse = FUNCTION_INST_CAST_FLAGS_ABBREV; + Vals.push_back(Flags); + } + } else { + assert(isa(I) && "Unknown instruction!"); + Code = bitc::FUNC_CODE_INST_BINOP; + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) + AbbrevToUse = FUNCTION_INST_BINOP_ABBREV; + pushValue(I.getOperand(1), InstID, Vals); + Vals.push_back(getEncodedBinaryOpcode(I.getOpcode())); + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) { + if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV) + AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV; + Vals.push_back(Flags); + } + } + break; + case Instruction::FNeg: { + Code = bitc::FUNC_CODE_INST_UNOP; + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) + AbbrevToUse = FUNCTION_INST_UNOP_ABBREV; + Vals.push_back(getEncodedUnaryOpcode(I.getOpcode())); + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) { + if (AbbrevToUse == FUNCTION_INST_UNOP_ABBREV) + AbbrevToUse = FUNCTION_INST_UNOP_FLAGS_ABBREV; + Vals.push_back(Flags); + } + break; + } + case Instruction::GetElementPtr: { + Code = bitc::FUNC_CODE_INST_GEP; + AbbrevToUse = FUNCTION_INST_GEP_ABBREV; + auto &GEPInst = cast(I); + Vals.push_back(getOptimizationFlags(&I)); + Vals.push_back(VE.getTypeID(GEPInst.getSourceElementType())); + for (const Value *Op : I.operands()) + pushValueAndType(Op, InstID, Vals); + break; + } + case Instruction::ExtractValue: { + Code = bitc::FUNC_CODE_INST_EXTRACTVAL; + pushValueAndType(I.getOperand(0), InstID, Vals); + const ExtractValueInst *EVI = cast(&I); + Vals.append(EVI->idx_begin(), EVI->idx_end()); + break; + } + case Instruction::InsertValue: { + Code = bitc::FUNC_CODE_INST_INSERTVAL; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValueAndType(I.getOperand(1), InstID, Vals); + const InsertValueInst *IVI = cast(&I); + Vals.append(IVI->idx_begin(), IVI->idx_end()); + break; + } + case Instruction::Select: { + Code = bitc::FUNC_CODE_INST_VSELECT; + pushValueAndType(I.getOperand(1), InstID, Vals); + pushValue(I.getOperand(2), InstID, Vals); + pushValueAndType(I.getOperand(0), InstID, Vals); + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) + Vals.push_back(Flags); + break; + } + case Instruction::ExtractElement: + Code = bitc::FUNC_CODE_INST_EXTRACTELT; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValueAndType(I.getOperand(1), InstID, Vals); + break; + case Instruction::InsertElement: + Code = bitc::FUNC_CODE_INST_INSERTELT; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValue(I.getOperand(1), InstID, Vals); + pushValueAndType(I.getOperand(2), InstID, Vals); + break; + case Instruction::ShuffleVector: + Code = bitc::FUNC_CODE_INST_SHUFFLEVEC; + pushValueAndType(I.getOperand(0), InstID, Vals); + pushValue(I.getOperand(1), InstID, Vals); + pushValue(cast(I).getShuffleMaskForBitcode(), InstID, + Vals); + break; + case Instruction::ICmp: + case Instruction::FCmp: { + // compare returning Int1Ty or vector of Int1Ty + Code = bitc::FUNC_CODE_INST_CMP2; + AbbrevToUse = FUNCTION_INST_CMP_ABBREV; + if (pushValueAndType(I.getOperand(0), InstID, Vals)) + AbbrevToUse = 0; + pushValue(I.getOperand(1), InstID, Vals); + Vals.push_back(cast(I).getPredicate()); + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) { + Vals.push_back(Flags); + if (AbbrevToUse) + AbbrevToUse = FUNCTION_INST_CMP_FLAGS_ABBREV; + } + break; + } + + case Instruction::Ret: + { + Code = bitc::FUNC_CODE_INST_RET; + unsigned NumOperands = I.getNumOperands(); + if (NumOperands == 0) + AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV; + else if (NumOperands == 1) { + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) + AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV; + } else { + for (const Value *Op : I.operands()) + pushValueAndType(Op, InstID, Vals); + } + } + break; + case Instruction::Br: + { + Code = bitc::FUNC_CODE_INST_BR; + AbbrevToUse = FUNCTION_INST_BR_UNCOND_ABBREV; + const BranchInst &II = cast(I); + Vals.push_back(VE.getValueID(II.getSuccessor(0))); + if (II.isConditional()) { + Vals.push_back(VE.getValueID(II.getSuccessor(1))); + pushValue(II.getCondition(), InstID, Vals); + AbbrevToUse = FUNCTION_INST_BR_COND_ABBREV; + } + } + break; + case Instruction::Switch: + { + Code = bitc::FUNC_CODE_INST_SWITCH; + const SwitchInst &SI = cast(I); + Vals.push_back(VE.getTypeID(SI.getCondition()->getType())); + pushValue(SI.getCondition(), InstID, Vals); + Vals.push_back(VE.getValueID(SI.getDefaultDest())); + for (auto Case : SI.cases()) { + Vals.push_back(VE.getValueID(Case.getCaseValue())); + Vals.push_back(VE.getValueID(Case.getCaseSuccessor())); + } + } + break; + case Instruction::IndirectBr: + Code = bitc::FUNC_CODE_INST_INDIRECTBR; + // Use NonOpaqueTypeMap for operand type if available + if (NonOpaqueTypeMap && NonOpaqueTypeMap->count(I.getOperand(0))) { + Vals.push_back(VE.getTypeID((*NonOpaqueTypeMap)[I.getOperand(0)])); + } else { + Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); + } + // Encode the address operand as relative, but not the basic blocks. + pushValue(I.getOperand(0), InstID, Vals); + for (const Value *Op : drop_begin(I.operands())) + Vals.push_back(VE.getValueID(Op)); + break; + + case Instruction::Invoke: { + const InvokeInst *II = cast(&I); + const Value *Callee = II->getCalledOperand(); + FunctionType *FTy = II->getFunctionType(); + + if (II->hasOperandBundles()) + writeOperandBundles(*II, InstID); + + Code = bitc::FUNC_CODE_INST_INVOKE; + + Vals.push_back(VE.getAttributeListID(II->getAttributes())); + Vals.push_back(II->getCallingConv() | 1 << 13); + Vals.push_back(VE.getValueID(II->getNormalDest())); + Vals.push_back(VE.getValueID(II->getUnwindDest())); + Vals.push_back(VE.getTypeID(FTy)); + pushValueAndType(Callee, InstID, Vals); + + // Emit value #'s for the fixed parameters. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + pushValue(I.getOperand(i), InstID, Vals); // fixed param. + + // Emit type/value pairs for varargs params. + if (FTy->isVarArg()) { + for (unsigned i = FTy->getNumParams(), e = II->arg_size(); i != e; ++i) + pushValueAndType(I.getOperand(i), InstID, Vals); // vararg + } + break; + } + case Instruction::Resume: + Code = bitc::FUNC_CODE_INST_RESUME; + pushValueAndType(I.getOperand(0), InstID, Vals); + break; + case Instruction::CleanupRet: { + Code = bitc::FUNC_CODE_INST_CLEANUPRET; + const auto &CRI = cast(I); + pushValue(CRI.getCleanupPad(), InstID, Vals); + if (CRI.hasUnwindDest()) + Vals.push_back(VE.getValueID(CRI.getUnwindDest())); + break; + } + case Instruction::CatchRet: { + Code = bitc::FUNC_CODE_INST_CATCHRET; + const auto &CRI = cast(I); + pushValue(CRI.getCatchPad(), InstID, Vals); + Vals.push_back(VE.getValueID(CRI.getSuccessor())); + break; + } + case Instruction::CleanupPad: + case Instruction::CatchPad: { + const auto &FuncletPad = cast(I); + Code = isa(FuncletPad) ? bitc::FUNC_CODE_INST_CATCHPAD + : bitc::FUNC_CODE_INST_CLEANUPPAD; + pushValue(FuncletPad.getParentPad(), InstID, Vals); + + unsigned NumArgOperands = FuncletPad.arg_size(); + Vals.push_back(NumArgOperands); + for (unsigned Op = 0; Op != NumArgOperands; ++Op) + pushValueAndType(FuncletPad.getArgOperand(Op), InstID, Vals); + break; + } + case Instruction::CatchSwitch: { + Code = bitc::FUNC_CODE_INST_CATCHSWITCH; + const auto &CatchSwitch = cast(I); + + pushValue(CatchSwitch.getParentPad(), InstID, Vals); + + unsigned NumHandlers = CatchSwitch.getNumHandlers(); + Vals.push_back(NumHandlers); + for (const BasicBlock *CatchPadBB : CatchSwitch.handlers()) + Vals.push_back(VE.getValueID(CatchPadBB)); + + if (CatchSwitch.hasUnwindDest()) + Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest())); + break; + } + case Instruction::CallBr: { + const CallBrInst *CBI = cast(&I); + const Value *Callee = CBI->getCalledOperand(); + FunctionType *FTy = CBI->getFunctionType(); + + if (CBI->hasOperandBundles()) + writeOperandBundles(*CBI, InstID); + + Code = bitc::FUNC_CODE_INST_CALLBR; + + Vals.push_back(VE.getAttributeListID(CBI->getAttributes())); + + Vals.push_back(CBI->getCallingConv() << bitc::CALL_CCONV | + 1 << bitc::CALL_EXPLICIT_TYPE); + + Vals.push_back(VE.getValueID(CBI->getDefaultDest())); + Vals.push_back(CBI->getNumIndirectDests()); + for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i) + Vals.push_back(VE.getValueID(CBI->getIndirectDest(i))); + + Vals.push_back(VE.getTypeID(FTy)); + pushValueAndType(Callee, InstID, Vals); + + // Emit value #'s for the fixed parameters. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + pushValue(I.getOperand(i), InstID, Vals); // fixed param. + + // Emit type/value pairs for varargs params. + if (FTy->isVarArg()) { + for (unsigned i = FTy->getNumParams(), e = CBI->arg_size(); i != e; ++i) + pushValueAndType(I.getOperand(i), InstID, Vals); // vararg + } + break; + } + case Instruction::Unreachable: + Code = bitc::FUNC_CODE_INST_UNREACHABLE; + AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; + break; + + case Instruction::PHI: { + const PHINode &PN = cast(I); + Code = bitc::FUNC_CODE_INST_PHI; + // With the newer instruction encoding, forward references could give + // negative valued IDs. This is most common for PHIs, so we use + // signed VBRs. + SmallVector Vals64; + // Use NonOpaqueTypeMap for PHI type if available + Type *PHIType = PN.getType(); + if (NonOpaqueTypeMap && NonOpaqueTypeMap->count(&I)) { + PHIType = (*NonOpaqueTypeMap)[&I]; + } + Vals64.push_back(VE.getTypeID(PHIType)); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + pushValueSigned(PN.getIncomingValue(i), InstID, Vals64); + Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i))); + } + + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) + Vals64.push_back(Flags); + + // Emit a Vals64 vector and exit. + Stream.EmitRecord(Code, Vals64, AbbrevToUse); + Vals64.clear(); + return; + } + + case Instruction::LandingPad: { + const LandingPadInst &LP = cast(I); + Code = bitc::FUNC_CODE_INST_LANDINGPAD; + // Use NonOpaqueTypeMap for landing pad type if available + Type *LPType = LP.getType(); + if (NonOpaqueTypeMap && NonOpaqueTypeMap->count(&I)) { + LPType = (*NonOpaqueTypeMap)[&I]; + } + Vals.push_back(VE.getTypeID(LPType)); + Vals.push_back(LP.isCleanup()); + Vals.push_back(LP.getNumClauses()); + for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) { + if (LP.isCatch(I)) + Vals.push_back(LandingPadInst::Catch); + else + Vals.push_back(LandingPadInst::Filter); + pushValueAndType(LP.getClause(I), InstID, Vals); + } + break; + } + + case Instruction::Alloca: { + Code = bitc::FUNC_CODE_INST_ALLOCA; + const AllocaInst &AI = cast(I); + Vals.push_back(VE.getTypeID(AI.getAllocatedType())); + Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); + Vals.push_back(VE.getValueID(I.getOperand(0))); // size. + using APV = AllocaPackedValues; + unsigned Record = 0; + unsigned EncodedAlign = getEncodedAlign(AI.getAlign()); + Bitfield::set( + Record, EncodedAlign & ((1 << APV::AlignLower::Bits) - 1)); + Bitfield::set(Record, + EncodedAlign >> APV::AlignLower::Bits); + Bitfield::set(Record, AI.isUsedWithInAlloca()); + Bitfield::set(Record, true); + Bitfield::set(Record, AI.isSwiftError()); + Vals.push_back(Record); + + unsigned AS = AI.getAddressSpace(); + if (AS != M.getDataLayout().getAllocaAddrSpace()) + Vals.push_back(AS); + break; + } + + case Instruction::Load: { + if (cast(I).isAtomic()) { + Code = bitc::FUNC_CODE_INST_LOADATOMIC; + pushValueAndType(I.getOperand(0), InstID, Vals); + } else { + Code = bitc::FUNC_CODE_INST_LOAD; + if (!pushValueAndType(I.getOperand(0), InstID, Vals)) // ptr + AbbrevToUse = FUNCTION_INST_LOAD_ABBREV; + } + // Use NonOpaqueTypeMap for loaded type if available + Type *LoadedType = I.getType(); + if (NonOpaqueTypeMap && NonOpaqueTypeMap->count(&I)) { + LoadedType = (*NonOpaqueTypeMap)[&I]; + } + Vals.push_back(VE.getTypeID(LoadedType)); + Vals.push_back(getEncodedAlign(cast(I).getAlign())); + Vals.push_back(cast(I).isVolatile()); + if (cast(I).isAtomic()) { + Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); + Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); + } + break; + } + case Instruction::Store: { + if (cast(I).isAtomic()) { + Code = bitc::FUNC_CODE_INST_STOREATOMIC; + } else { + Code = bitc::FUNC_CODE_INST_STORE; + AbbrevToUse = FUNCTION_INST_STORE_ABBREV; + } + if (pushValueAndType(I.getOperand(1), InstID, Vals)) // ptrty + ptr + AbbrevToUse = 0; + if (pushValueAndType(I.getOperand(0), InstID, Vals)) // valty + val + AbbrevToUse = 0; + Vals.push_back(getEncodedAlign(cast(I).getAlign())); + Vals.push_back(cast(I).isVolatile()); + if (cast(I).isAtomic()) { + Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); + Vals.push_back( + getEncodedSyncScopeID(cast(I).getSyncScopeID())); + } + break; + } + case Instruction::AtomicCmpXchg: { + Code = bitc::FUNC_CODE_INST_CMPXCHG; + pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr + pushValueAndType(I.getOperand(1), InstID, Vals); // cmp. + pushValue(I.getOperand(2), InstID, Vals); // newval. + Vals.push_back(cast(I).isVolatile()); + Vals.push_back( + getEncodedOrdering(cast(I).getSuccessOrdering())); + Vals.push_back( + getEncodedSyncScopeID(cast(I).getSyncScopeID())); + Vals.push_back( + getEncodedOrdering(cast(I).getFailureOrdering())); + Vals.push_back(cast(I).isWeak()); + Vals.push_back(getEncodedAlign(cast(I).getAlign())); + break; + } + case Instruction::AtomicRMW: { + Code = bitc::FUNC_CODE_INST_ATOMICRMW; + pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr + pushValueAndType(I.getOperand(1), InstID, Vals); // valty + val + Vals.push_back( + getEncodedRMWOperation(cast(I).getOperation())); + Vals.push_back(cast(I).isVolatile()); + Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); + Vals.push_back( + getEncodedSyncScopeID(cast(I).getSyncScopeID())); + Vals.push_back(getEncodedAlign(cast(I).getAlign())); + break; + } + case Instruction::Fence: { + Code = bitc::FUNC_CODE_INST_FENCE; + const FenceInst &FI = cast(I); + Vals.push_back(getEncodedOrdering(FI.getOrdering())); + Vals.push_back(getEncodedSyncScopeID(FI.getSyncScopeID())); + break; + } + case Instruction::Call: { + const CallInst &CI = cast(I); + FunctionType *FTy = CI.getFunctionType(); + + if (CI.hasOperandBundles()) + writeOperandBundles(CI, InstID); + + Code = bitc::FUNC_CODE_INST_CALL; + + Vals.push_back(VE.getAttributeListID(CI.getAttributes())); + + unsigned Flags = getOptimizationFlags(&I); + Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV | + unsigned(CI.isTailCall()) << bitc::CALL_TAIL | + unsigned(CI.isMustTailCall()) << bitc::CALL_MUSTTAIL | + 1 << bitc::CALL_EXPLICIT_TYPE | + unsigned(CI.isNoTailCall()) << bitc::CALL_NOTAIL | + unsigned(Flags != 0) << bitc::CALL_FMF); + if (Flags != 0) + Vals.push_back(Flags); + + Vals.push_back(VE.getTypeID(FTy)); + pushValueAndType(CI.getCalledOperand(), InstID, Vals); // Callee + + // Emit value #'s for the fixed parameters. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + pushValue(CI.getArgOperand(i), InstID, Vals); // fixed param. + + // Emit type/value pairs for varargs params. + if (FTy->isVarArg()) { + for (unsigned i = FTy->getNumParams(), e = CI.arg_size(); i != e; ++i) + pushValueAndType(CI.getArgOperand(i), InstID, Vals); // varargs + } + break; + } + case Instruction::VAArg: { + Code = bitc::FUNC_CODE_INST_VAARG; + Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty + pushValue(I.getOperand(0), InstID, Vals); // valist. + // Use NonOpaqueTypeMap for VAArg type if available + Type *VAArgType = I.getType(); + if (NonOpaqueTypeMap && NonOpaqueTypeMap->count(&I)) { + VAArgType = (*NonOpaqueTypeMap)[&I]; + } + Vals.push_back(VE.getTypeID(VAArgType)); // restype. + break; + } + case Instruction::Freeze: { + Code = bitc::FUNC_CODE_INST_FREEZE; + pushValueAndType(I.getOperand(0), InstID, Vals); + break; + } + } + + Stream.EmitRecord(Code, Vals, AbbrevToUse); + Vals.clear(); +} + + +void NonOpaqueTypeModuleWriter::write() { + writeIdentificationBlock(Stream); + + Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + // We will want to write the module hash at this point. Block any flushing so + // we can have access to the whole underlying data later. + Stream.markAndBlockFlushing(); + + writeModuleVersion(); + + // Emit blockinfo, which defines the standard abbreviations etc. + writeBlockInfo(); + + // Emit information describing all of the types in the module. + NonOpaqueTypeModuleWriter::writeTypeTable(); + + // Emit information about attribute groups. + writeAttributeGroupTable(); + + // Emit information about parameter attributes. + writeAttributeTable(); + + writeComdats(); + + // Emit top-level description of module, including target triple, inline asm, + // descriptors for global variables, and function prototype info. + writeModuleInfo(); + + // Emit constants. + writeModuleConstants(); + + // Emit metadata kind names. + writeModuleMetadataKinds(); + + // Emit metadata. + writeModuleMetadata(); + + // Emit module-level use-lists. + if (VE.shouldPreserveUseListOrder()) + writeUseListBlock(nullptr); + + writeOperandBundleTags(); + writeSyncScopeNames(); + + // Emit function bodies. + DenseMap FunctionToBitcodeIndex; + for (const Function &F : M) + if (!F.isDeclaration()) + writeFunction(F, FunctionToBitcodeIndex); + + // Need to write after the above call to WriteFunction which populates + // the summary information in the index. + if (Index) + writePerModuleGlobalValueSummary(); + + writeGlobalValueSymbolTable(FunctionToBitcodeIndex); + + writeModuleHash(Stream.getMarkedBufferAndResumeFlushing()); + + Stream.ExitBlock(); +} + + +void BitcodeWriter::writeBitcodeWithNonOpaqueTypes(const Module &M, + bool ShouldPreserveUseListOrder, + const ModuleSummaryIndex *Index, + bool GenerateHash, + ModuleHash *ModHash, + bool WriteNonOpaqueTypes, + DenseMap *NonOpaqueTypeMap) { + assert(!WroteStrtab); + + assert(M.isMaterialized()); + Mods.push_back(const_cast(&M)); + NonOpaqueTypeModuleWriter NonOpaqueTypeModuleWriter(M, StrtabBuilder, *Stream, + ShouldPreserveUseListOrder, *Index, + GenerateHash, *ModHash, NonOpaqueTypeMap); + NonOpaqueTypeModuleWriter.write(); +} + /// Write the specified module to the specified output stream. void llvm::WriteBitcodeToFile(const Module &M, raw_ostream &Out, bool ShouldPreserveUseListOrder, diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp index b58c60d1db0a9..fd7df6b872fd9 100644 --- a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -41,6 +41,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/UniqueBBID.h" #include "llvm/Support/WithColor.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 1eedfc4b25912..e317e1c06741f 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -79,6 +79,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/UniqueBBID.h" #include "llvm/Target/TargetMachine.h" #include diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index fa54640265162..7baeb3fd7bcee 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/UniqueBBID.h" #include using namespace llvm; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index ef39fc74554c9..d7280eaba2440 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2593,6 +2593,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::reset_fpmode: MIRBuilder.buildResetFPMode(); return true; + case Intrinsic::get_rounding: + MIRBuilder.buildGetRounding(getOrCreateVReg(CI)); + return true; case Intrinsic::vscale: { MIRBuilder.buildVScale(getOrCreateVReg(CI), 1); return true; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index cdf192f9e7e3a..11b3ac82e5136 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -9272,7 +9272,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf; APInt QNaNBitMask = APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1); - APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits()); + APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits()); auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit); auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask); @@ -9400,7 +9400,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) { NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign); else if (PartialCheck == fcPosNormal) { auto PosSign = MIRBuilder.buildXor( - DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask)); + DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask)); NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign); } appendToRes(NormalRes); diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index d2c79f64afe64..b38a4d1c55af9 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -338,6 +338,17 @@ class LoopCarriedOrderDepsTracker { void addLoopCarriedDepenenciesForChunks(const LoadStoreChunk &From, const LoadStoreChunk &To); + /// Add a loop-carried order dependency between \p Src and \p Dst if we + /// cannot prove they are independent. When \p PerformCheapCheck is true, a + /// lightweight dependency test (referred to as "cheap check" below) is + /// performed at first. Note that the cheap check is retained to maintain the + /// existing behavior and not expected to be used anymore. + /// + /// TODO: Remove \p PerformCheapCheck and the corresponding cheap check. + void addDependenciesBetweenSUs(const SUnitWithMemInfo &Src, + const SUnitWithMemInfo &Dst, + bool PerformCheapCheck = false); + void computeDependenciesAux(); }; @@ -673,7 +684,7 @@ void SwingSchedulerDAG::schedule() { Topo.InitDAGTopologicalSorting(); changeDependences(); postProcessDAG(); - DDG = std::make_unique(SUnits, &EntrySU, &ExitSU); + DDG = std::make_unique(SUnits, &EntrySU, &ExitSU, LCE); LLVM_DEBUG({ dump(); dbgs() << "===== Loop Carried Edges Begin =====\n"; @@ -958,11 +969,11 @@ bool SUnitWithMemInfo::getUnderlyingObjects() { /// Returns true if there is a loop-carried order dependency from \p Src to \p /// Dst. -static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, - const SUnitWithMemInfo &Dst, - BatchAAResults &BAA, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { +static bool +hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, + BatchAAResults &BAA, const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + const SwingSchedulerDAG *SSD, bool PerformCheapCheck) { if (Src.isTriviallyDisjoint(Dst)) return false; if (isSuccOrder(Src.SU, Dst.SU)) @@ -970,24 +981,32 @@ static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, MachineInstr &SrcMI = *Src.SU->getInstr(); MachineInstr &DstMI = *Dst.SU->getInstr(); - // First, perform the cheaper check that compares the base register. - // If they are the same and the load offset is less than the store - // offset, then mark the dependence as loop carried potentially. - const MachineOperand *BaseOp1, *BaseOp2; - int64_t Offset1, Offset2; - bool Offset1IsScalable, Offset2IsScalable; - if (TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable, - TRI) && - TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable, - TRI)) { - if (BaseOp1->isIdenticalTo(*BaseOp2) && - Offset1IsScalable == Offset2IsScalable && (int)Offset1 < (int)Offset2) { - assert(TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) && - "What happened to the chain edge?"); - return true; + if (PerformCheapCheck) { + // First, perform the cheaper check that compares the base register. + // If they are the same and the load offset is less than the store + // offset, then mark the dependence as loop carried potentially. + // + // TODO: This check will be removed. + const MachineOperand *BaseOp1, *BaseOp2; + int64_t Offset1, Offset2; + bool Offset1IsScalable, Offset2IsScalable; + if (TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable, + TRI) && + TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable, + TRI)) { + if (BaseOp1->isIdenticalTo(*BaseOp2) && + Offset1IsScalable == Offset2IsScalable && + (int)Offset1 < (int)Offset2) { + assert(TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) && + "What happened to the chain edge?"); + return true; + } } } + if (!SSD->mayOverlapInLaterIter(&SrcMI, &DstMI)) + return false; + // Second, the more expensive check that uses alias analysis on the // base registers. If they alias, and the load offset is less than // the store offset, the mark the dependence as loop carried. @@ -1056,20 +1075,34 @@ LoopCarriedOrderDepsTracker::getInstrTag(SUnit *SU) const { return std::nullopt; } +void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs( + const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, + bool PerformCheapCheck) { + // Avoid self-dependencies. + if (Src.SU == Dst.SU) + return; + + if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG, PerformCheapCheck)) + LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum); +} + void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks( const LoadStoreChunk &From, const LoadStoreChunk &To) { - // Add dependencies for load-to-store (WAR) from top to bottom. + // Add load-to-store dependencies (WAR). for (const SUnitWithMemInfo &Src : From.Loads) for (const SUnitWithMemInfo &Dst : To.Stores) - if (Src.SU->NodeNum < Dst.SU->NodeNum && - hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI)) - LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum); + // Perform a cheap check first if this is a forward dependency. + addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum); - // TODO: The following dependencies are missed. - // - // - Dependencies for load-to-store from bottom to top. - // - Dependencies for store-to-load (RAW). - // - Dependencies for store-to-store (WAW). + // Add store-to-load dependencies (RAW). + for (const SUnitWithMemInfo &Src : From.Stores) + for (const SUnitWithMemInfo &Dst : To.Loads) + addDependenciesBetweenSUs(Src, Dst); + + // Add store-to-store dependencies (WAW). + for (const SUnitWithMemInfo &Src : From.Stores) + for (const SUnitWithMemInfo &Dst : To.Stores) + addDependenciesBetweenSUs(Src, Dst); } void LoopCarriedOrderDepsTracker::computeDependenciesAux() { @@ -1116,7 +1149,7 @@ LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() { for (const int Succ : LCODTracker.getLoopCarried(I).set_bits()) LCE.OrderDeps[&SUnits[I]].insert(&SUnits[Succ]); - LCE.modifySUnits(SUnits); + LCE.modifySUnits(SUnits, TII); return LCE; } @@ -2676,6 +2709,11 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { }); } while (++NI != NE && scheduleFound); + // If a schedule is found, validate it against the validation-only + // dependencies. + if (scheduleFound) + scheduleFound = DDG->isValidSchedule(Schedule); + // If a schedule is found, ensure non-pipelined instructions are in stage 0 if (scheduleFound) scheduleFound = @@ -4118,6 +4156,8 @@ SwingSchedulerDDG::getEdges(const SUnit *SU) const { void SwingSchedulerDDG::addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge) { + assert(!Edge.isValidationOnly() && + "Validation-only edges are not expected here."); auto &Edges = getEdges(SU); if (Edge.getSrc() == SU) Edges.Succs.push_back(Edge); @@ -4127,25 +4167,43 @@ void SwingSchedulerDDG::addEdge(const SUnit *SU, void SwingSchedulerDDG::initEdges(SUnit *SU) { for (const auto &PI : SU->Preds) { - SwingSchedulerDDGEdge Edge(SU, PI, false); + SwingSchedulerDDGEdge Edge(SU, PI, /*IsSucc=*/false, + /*IsValidationOnly=*/false); addEdge(SU, Edge); } for (const auto &SI : SU->Succs) { - SwingSchedulerDDGEdge Edge(SU, SI, true); + SwingSchedulerDDGEdge Edge(SU, SI, /*IsSucc=*/true, + /*IsValidationOnly=*/false); addEdge(SU, Edge); } } SwingSchedulerDDG::SwingSchedulerDDG(std::vector &SUnits, SUnit *EntrySU, - SUnit *ExitSU) + SUnit *ExitSU, const LoopCarriedEdges &LCE) : EntrySU(EntrySU), ExitSU(ExitSU) { EdgesVec.resize(SUnits.size()); + // Add non-loop-carried edges based on the DAG. initEdges(EntrySU); initEdges(ExitSU); for (auto &SU : SUnits) initEdges(&SU); + + // Add loop-carried edges, which are not represented in the DAG. + for (SUnit &SU : SUnits) { + SUnit *Src = &SU; + if (const LoopCarriedEdges::OrderDep *OD = LCE.getOrderDepOrNull(Src)) { + SDep Base(Src, SDep::Barrier); + Base.setLatency(1); + for (SUnit *Dst : *OD) { + SwingSchedulerDDGEdge Edge(Dst, Base, /*IsSucc=*/false, + /*IsValidationOnly=*/true); + Edge.setDistance(1); + ValidationOnlyEdges.push_back(Edge); + } + } + } } const SwingSchedulerDDG::EdgesType & @@ -4158,17 +4216,73 @@ SwingSchedulerDDG::getOutEdges(const SUnit *SU) const { return getEdges(SU).Succs; } -void LoopCarriedEdges::modifySUnits(std::vector &SUnits) { - // Currently this function simply adds all dependencies represented by this - // object. After we properly handle missed dependencies, the logic here will - // be more complex, as currently missed edges should not be added to the DAG. +/// Check if \p Schedule doesn't violate the validation-only dependencies. +bool SwingSchedulerDDG::isValidSchedule(const SMSchedule &Schedule) const { + unsigned II = Schedule.getInitiationInterval(); + + auto ExpandCycle = [&](SUnit *SU) { + int Stage = Schedule.stageScheduled(SU); + int Cycle = Schedule.cycleScheduled(SU); + return Cycle + (Stage * II); + }; + + for (const SwingSchedulerDDGEdge &Edge : ValidationOnlyEdges) { + SUnit *Src = Edge.getSrc(); + SUnit *Dst = Edge.getDst(); + if (!Src->isInstr() || !Dst->isInstr()) + continue; + int CycleSrc = ExpandCycle(Src); + int CycleDst = ExpandCycle(Dst); + int MaxLateStart = CycleDst + Edge.getDistance() * II - Edge.getLatency(); + if (CycleSrc > MaxLateStart) { + LLVM_DEBUG({ + dbgs() << "Validation failed for edge from " << Src->NodeNum << " to " + << Dst->NodeNum << "\n"; + }); + return false; + } + } + return true; +} + +void LoopCarriedEdges::modifySUnits(std::vector &SUnits, + const TargetInstrInfo *TII) { for (SUnit &SU : SUnits) { SUnit *Src = &SU; if (auto *OrderDep = getOrderDepOrNull(Src)) { SDep Dep(Src, SDep::Barrier); Dep.setLatency(1); - for (SUnit *Dst : *OrderDep) - Dst->addPred(Dep); + for (SUnit *Dst : *OrderDep) { + SUnit *From = Src; + SUnit *To = Dst; + if (From->NodeNum > To->NodeNum) + std::swap(From, To); + + // Add a forward edge if the following conditions are met: + // + // - The instruction of the source node (FromMI) may read memory. + // - The instruction of the target node (ToMI) may modify memory, but + // does not read it. + // - Neither instruction is a global barrier. + // - The load appears before the store in the original basic block. + // - There are no barrier or store instructions between the two nodes. + // - The target node is unreachable from the source node in the current + // DAG. + // + // TODO: These conditions are inherited from a previous implementation, + // and some may no longer be necessary. For now, we conservatively + // retain all of them to avoid regressions, but the logic could + // potentially be simplified + MachineInstr *FromMI = From->getInstr(); + MachineInstr *ToMI = To->getInstr(); + if (FromMI->mayLoad() && !ToMI->mayLoad() && ToMI->mayStore() && + !TII->isGlobalMemoryObject(FromMI) && + !TII->isGlobalMemoryObject(ToMI) && !isSuccOrder(From, To)) { + SDep Pred = Dep; + Pred.setSUnit(Src); + Dst->addPred(Pred); + } + } } } } diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 265a32cf4d127..8de2c48581a1e 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/RuntimeLibcalls.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/InitializePasses.h" @@ -135,17 +136,22 @@ static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) { return CallInst::TCK_None; } -static bool lowerObjCCall(Function &F, const char *NewFn, +static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn, bool setNonLazyBind = false) { assert(IntrinsicInst::mayLowerToFunctionCall(F.getIntrinsicID()) && "Pre-ISel intrinsics do lower into regular function calls"); if (F.use_empty()) return false; + // FIXME: When RuntimeLibcalls is an analysis, check if the function is really + // supported, and go through RTLIB::Libcall. + const char *NewFnName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(NewFn); + // If we haven't already looked up this function, check to see if the // program already contains a function with this name. Module *M = F.getParent(); - FunctionCallee FCache = M->getOrInsertFunction(NewFn, F.getFunctionType()); + FunctionCallee FCache = + M->getOrInsertFunction(NewFnName, F.getFunctionType()); if (Function *Fn = dyn_cast(FCache.getCallee())) { Fn->setLinkage(F.getLinkage()); @@ -501,82 +507,83 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { }); break; case Intrinsic::objc_autorelease: - Changed |= lowerObjCCall(F, "objc_autorelease"); + Changed |= lowerObjCCall(F, RTLIB::objc_autorelease); break; case Intrinsic::objc_autoreleasePoolPop: - Changed |= lowerObjCCall(F, "objc_autoreleasePoolPop"); + Changed |= lowerObjCCall(F, RTLIB::objc_autoreleasePoolPop); break; case Intrinsic::objc_autoreleasePoolPush: - Changed |= lowerObjCCall(F, "objc_autoreleasePoolPush"); + Changed |= lowerObjCCall(F, RTLIB::objc_autoreleasePoolPush); break; case Intrinsic::objc_autoreleaseReturnValue: - Changed |= lowerObjCCall(F, "objc_autoreleaseReturnValue"); + Changed |= lowerObjCCall(F, RTLIB::objc_autoreleaseReturnValue); break; case Intrinsic::objc_copyWeak: - Changed |= lowerObjCCall(F, "objc_copyWeak"); + Changed |= lowerObjCCall(F, RTLIB::objc_copyWeak); break; case Intrinsic::objc_destroyWeak: - Changed |= lowerObjCCall(F, "objc_destroyWeak"); + Changed |= lowerObjCCall(F, RTLIB::objc_destroyWeak); break; case Intrinsic::objc_initWeak: - Changed |= lowerObjCCall(F, "objc_initWeak"); + Changed |= lowerObjCCall(F, RTLIB::objc_initWeak); break; case Intrinsic::objc_loadWeak: - Changed |= lowerObjCCall(F, "objc_loadWeak"); + Changed |= lowerObjCCall(F, RTLIB::objc_loadWeak); break; case Intrinsic::objc_loadWeakRetained: - Changed |= lowerObjCCall(F, "objc_loadWeakRetained"); + Changed |= lowerObjCCall(F, RTLIB::objc_loadWeakRetained); break; case Intrinsic::objc_moveWeak: - Changed |= lowerObjCCall(F, "objc_moveWeak"); + Changed |= lowerObjCCall(F, RTLIB::objc_moveWeak); break; case Intrinsic::objc_release: - Changed |= lowerObjCCall(F, "objc_release", true); + Changed |= lowerObjCCall(F, RTLIB::objc_release, true); break; case Intrinsic::objc_retain: - Changed |= lowerObjCCall(F, "objc_retain", true); + Changed |= lowerObjCCall(F, RTLIB::objc_retain, true); break; case Intrinsic::objc_retainAutorelease: - Changed |= lowerObjCCall(F, "objc_retainAutorelease"); + Changed |= lowerObjCCall(F, RTLIB::objc_retainAutorelease); break; case Intrinsic::objc_retainAutoreleaseReturnValue: - Changed |= lowerObjCCall(F, "objc_retainAutoreleaseReturnValue"); + Changed |= lowerObjCCall(F, RTLIB::objc_retainAutoreleaseReturnValue); break; case Intrinsic::objc_retainAutoreleasedReturnValue: - Changed |= lowerObjCCall(F, "objc_retainAutoreleasedReturnValue"); + Changed |= lowerObjCCall(F, RTLIB::objc_retainAutoreleasedReturnValue); break; case Intrinsic::objc_claimAutoreleasedReturnValue: - Changed |= lowerObjCCall(F, "objc_claimAutoreleasedReturnValue"); + Changed |= lowerObjCCall(F, RTLIB::objc_claimAutoreleasedReturnValue); break; case Intrinsic::objc_retainBlock: - Changed |= lowerObjCCall(F, "objc_retainBlock"); + Changed |= lowerObjCCall(F, RTLIB::objc_retainBlock); break; case Intrinsic::objc_storeStrong: - Changed |= lowerObjCCall(F, "objc_storeStrong"); + Changed |= lowerObjCCall(F, RTLIB::objc_storeStrong); break; case Intrinsic::objc_storeWeak: - Changed |= lowerObjCCall(F, "objc_storeWeak"); + Changed |= lowerObjCCall(F, RTLIB::objc_storeWeak); break; case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue: - Changed |= lowerObjCCall(F, "objc_unsafeClaimAutoreleasedReturnValue"); + Changed |= + lowerObjCCall(F, RTLIB::objc_unsafeClaimAutoreleasedReturnValue); break; case Intrinsic::objc_retainedObject: - Changed |= lowerObjCCall(F, "objc_retainedObject"); + Changed |= lowerObjCCall(F, RTLIB::objc_retainedObject); break; case Intrinsic::objc_unretainedObject: - Changed |= lowerObjCCall(F, "objc_unretainedObject"); + Changed |= lowerObjCCall(F, RTLIB::objc_unretainedObject); break; case Intrinsic::objc_unretainedPointer: - Changed |= lowerObjCCall(F, "objc_unretainedPointer"); + Changed |= lowerObjCCall(F, RTLIB::objc_unretainedPointer); break; case Intrinsic::objc_retain_autorelease: - Changed |= lowerObjCCall(F, "objc_retain_autorelease"); + Changed |= lowerObjCCall(F, RTLIB::objc_retain_autorelease); break; case Intrinsic::objc_sync_enter: - Changed |= lowerObjCCall(F, "objc_sync_enter"); + Changed |= lowerObjCCall(F, RTLIB::objc_sync_enter); break; case Intrinsic::objc_sync_exit: - Changed |= lowerObjCCall(F, "objc_sync_exit"); + Changed |= lowerObjCCall(F, RTLIB::objc_sync_exit); break; case Intrinsic::exp: case Intrinsic::exp2: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9ffdda28f7899..231184587d682 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13091,10 +13091,10 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, EVT CondVT = Cond.getValueType(); assert(CondVT.isVector() && "Vector select expects a vector selector!"); - bool IsTAllZero = ISD::isBuildVectorAllZeros(TVal.getNode()); - bool IsTAllOne = ISD::isBuildVectorAllOnes(TVal.getNode()); - bool IsFAllZero = ISD::isBuildVectorAllZeros(FVal.getNode()); - bool IsFAllOne = ISD::isBuildVectorAllOnes(FVal.getNode()); + bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode()); + bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode()); + bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode()); + bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode()); // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne) @@ -13194,8 +13194,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return V; // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 - if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) - return DAG.getSelect(DL, VT, F, N2, N1); + if (!TLI.isTargetCanonicalSelect(N)) + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) + return DAG.getSelect(DL, VT, F, N2, N1); // select (sext m), (add X, C), X --> (add X, (and C, (sext m)))) if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5a4cc466d2bce..58be4fb7e8331 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -10459,7 +10459,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { // select true, T, F --> T // select false, T, F --> F - if (auto C = isBoolConstant(Cond, /*AllowTruncation=*/true)) + if (auto C = isBoolConstant(Cond)) return *C ? T : F; // select ?, T, T --> T @@ -13688,13 +13688,14 @@ bool SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { return false; } -std::optional SelectionDAG::isBoolConstant(SDValue N, - bool AllowTruncation) const { - ConstantSDNode *Const = isConstOrConstSplat(N, false, AllowTruncation); +std::optional SelectionDAG::isBoolConstant(SDValue N) const { + ConstantSDNode *Const = + isConstOrConstSplat(N, false, /*AllowTruncation=*/true); if (!Const) return std::nullopt; - const APInt &CVal = Const->getAPIntValue(); + EVT VT = N->getValueType(0); + const APInt CVal = Const->getAPIntValue().trunc(VT.getScalarSizeInBits()); switch (TLI->getBooleanContents(N.getValueType())) { case TargetLowering::ZeroOrOneBooleanContent: if (CVal.isOne()) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 16a10cf4d0323..e0597988e8907 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9054,14 +9054,14 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf; APInt QNaNBitMask = APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1); - APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits()); + APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits()); SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT); SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT); SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT); SDValue ZeroV = DAG.getConstant(0, DL, IntVT); SDValue InfV = DAG.getConstant(Inf, DL, IntVT); - SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT); + SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT); SDValue Res; const auto appendResult = [&](SDValue PartialRes) { @@ -9205,7 +9205,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); else if (PartialCheck == fcPosNormal) { SDValue PosSignV = - DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask); + DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask); PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV); } if (IsF80) @@ -9217,7 +9217,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, if (!Res) return DAG.getConstant(IsInverted, DL, ResultVT); if (IsInverted) - Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask); + Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask); return Res; } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 660a1a4d7ec47..518a9339d8d11 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -214,6 +214,24 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, Reg1.isPhysical() ? MI.getOperand(Idx1).isRenamable() : false; bool Reg2IsRenamable = Reg2.isPhysical() ? MI.getOperand(Idx2).isRenamable() : false; + + // For a case like this: + // %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0 + // we need to update the implicit-def after commuting to result in: + // %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1 + SmallVector UpdateImplicitDefIdx; + if (HasDef && MI.hasImplicitDef()) { + const TargetRegisterInfo *TRI = + MI.getMF()->getSubtarget().getRegisterInfo(); + for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) { + Register ImplReg = MO.getReg(); + if ((ImplReg.isVirtual() && ImplReg == Reg0) || + (ImplReg.isPhysical() && Reg0.isPhysical() && + TRI->isSubRegisterEq(ImplReg, Reg0))) + UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands()); + } + } + // If destination is tied to either of the commuted source register, then // it must be updated. if (HasDef && Reg0 == Reg1 && @@ -238,15 +256,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, } if (HasDef) { - // Use `substituteRegister` so that for a case like this: - // %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0 - // the implicit-def is also updated, to result in: - // %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1 - const TargetRegisterInfo &TRI = - *MI.getMF()->getSubtarget().getRegisterInfo(); - Register FromReg = CommutedMI->getOperand(0).getReg(); - CommutedMI->substituteRegister(FromReg, Reg0, /*SubRegIdx=*/0, TRI); + CommutedMI->getOperand(0).setReg(Reg0); CommutedMI->getOperand(0).setSubReg(SubReg0); + for (unsigned Idx : UpdateImplicitDefIdx) + CommutedMI->getOperand(Idx).setReg(Reg0); } CommutedMI->getOperand(Idx2).setReg(Reg1); CommutedMI->getOperand(Idx1).setReg(Reg2); diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp index 1e325d76bd515..12d31f809f882 100644 --- a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp @@ -239,15 +239,14 @@ Error makeUnexpectedOpcodeError(const LinkGraph &G, const ThumbRelocation &R, Edge::Kind Kind) { return make_error( formatv("Invalid opcode [ {0:x4}, {1:x4} ] for relocation: {2}", - static_cast(R.Hi), static_cast(R.Lo), - G.getEdgeKindName(Kind))); + R.Hi.value(), R.Lo.value(), G.getEdgeKindName(Kind))); } Error makeUnexpectedOpcodeError(const LinkGraph &G, const ArmRelocation &R, Edge::Kind Kind) { return make_error( - formatv("Invalid opcode {0:x8} for relocation: {1}", - static_cast(R.Wd), G.getEdgeKindName(Kind))); + formatv("Invalid opcode {0:x8} for relocation: {1}", R.Wd.value(), + G.getEdgeKindName(Kind))); } template constexpr bool isArm() { diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index db792a3b52d24..170224616ac64 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy( Expected OpenMPIRBuilder::emitInterWarpCopyFunction( const LocationDescription &Loc, ArrayRef ReductionInfos, AttributeList FuncAttrs) { - InsertPointTy SavedIP = Builder.saveIP(); + IRBuilder<>::InsertPointGuard IPG(Builder); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()}, @@ -2630,6 +2630,7 @@ Expected OpenMPIRBuilder::emitInterWarpCopyFunction( WcFunc->addParamAttr(1, Attribute::NoUndef); BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc); Builder.SetInsertPoint(EntryBB); + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // ReduceList: thread local Reduce list. // At the stage of the computation when this function is called, partially @@ -2844,7 +2845,6 @@ Expected OpenMPIRBuilder::emitInterWarpCopyFunction( } Builder.CreateRetVoid(); - Builder.restoreIP(SavedIP); return WcFunc; } @@ -2853,6 +2853,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( ArrayRef ReductionInfos, Function *ReduceFn, AttributeList FuncAttrs) { LLVMContext &Ctx = M.getContext(); + IRBuilder<>::InsertPointGuard IPG(Builder); FunctionType *FuncTy = FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt16Ty(), @@ -2871,6 +2872,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( SarFunc->addParamAttr(3, Attribute::SExt); BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc); Builder.SetInsertPoint(EntryBB); + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Thread local Reduce list used to host the values of data to be reduced. Argument *ReduceListArg = SarFunc->getArg(0); @@ -3017,7 +3019,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( ArrayRef ReductionInfos, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); + IRBuilder<>::InsertPointGuard IPG(Builder); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3033,6 +3035,7 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc); Builder.SetInsertPoint(EntryBlock); + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGCFunc->getArg(0); @@ -3120,14 +3123,13 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( } Builder.CreateRetVoid(); - Builder.restoreIP(OldIP); return LtGCFunc; } Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( ArrayRef ReductionInfos, Function *ReduceFn, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); + IRBuilder<>::InsertPointGuard IPG(Builder); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3143,6 +3145,7 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc); Builder.SetInsertPoint(EntryBlock); + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGRFunc->getArg(0); @@ -3203,14 +3206,13 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList}) ->addFnAttr(Attribute::NoUnwind); Builder.CreateRetVoid(); - Builder.restoreIP(OldIP); return LtGRFunc; } Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( ArrayRef ReductionInfos, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); + IRBuilder<>::InsertPointGuard IPG(Builder); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3226,6 +3228,7 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc); Builder.SetInsertPoint(EntryBlock); + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGCFunc->getArg(0); @@ -3311,14 +3314,13 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( } Builder.CreateRetVoid(); - Builder.restoreIP(OldIP); return LtGCFunc; } Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( ArrayRef ReductionInfos, Function *ReduceFn, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); + IRBuilder<>::InsertPointGuard IPG(Builder); LLVMContext &Ctx = M.getContext(); auto *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3334,6 +3336,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc); Builder.SetInsertPoint(EntryBlock); + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGRFunc->getArg(0); @@ -3394,7 +3397,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( Builder.CreateCall(ReduceFn, {ReduceList, ReductionList}) ->addFnAttr(Attribute::NoUnwind); Builder.CreateRetVoid(); - Builder.restoreIP(OldIP); return LtGRFunc; } @@ -3407,6 +3409,7 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const { Expected OpenMPIRBuilder::createReductionFunction( StringRef ReducerName, ArrayRef ReductionInfos, ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) { + IRBuilder<>::InsertPointGuard IPG(Builder); auto *FuncTy = FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, /* IsVarArg */ false); @@ -3419,6 +3422,7 @@ Expected OpenMPIRBuilder::createReductionFunction( BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", ReductionFunc); Builder.SetInsertPoint(EntryBB); + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Need to alloca memory here and deal with the pointers before getting // LHS/RHS pointers out @@ -3746,10 +3750,12 @@ static Error populateReductionFunction( Function *ReductionFunc, ArrayRef ReductionInfos, IRBuilder<> &Builder, ArrayRef IsByRef, bool IsGPU) { + IRBuilder<>::InsertPointGuard IPG(Builder); Module *Module = ReductionFunc->getParent(); BasicBlock *ReductionFuncBlock = BasicBlock::Create(Module->getContext(), "", ReductionFunc); Builder.SetInsertPoint(ReductionFuncBlock); + Builder.SetCurrentDebugLocation(llvm::DebugLoc()); Value *LHSArrayPtr = nullptr; Value *RHSArrayPtr = nullptr; if (IsGPU) { diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 4e09f847627af..84a56058de834 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1325,6 +1325,63 @@ return wrap(unwrap(Builder)->createEnumerationType( LineNumber, SizeInBits, AlignInBits, Elts, unwrapDI(ClassTy))); } +LLVMMetadataRef LLVMDIBuilderCreateSetType( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, + size_t NameLen, LLVMMetadataRef File, unsigned LineNumber, + uint64_t SizeInBits, uint32_t AlignInBits, LLVMMetadataRef BaseTy) { + return wrap(unwrap(Builder)->createSetType( + unwrapDI(Scope), {Name, NameLen}, unwrapDI(File), + LineNumber, SizeInBits, AlignInBits, unwrapDI(BaseTy))); +} + +LLVMMetadataRef LLVMDIBuilderCreateSubrangeType( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, + size_t NameLen, unsigned LineNo, LLVMMetadataRef File, uint64_t SizeInBits, + uint32_t AlignInBits, LLVMDIFlags Flags, LLVMMetadataRef BaseTy, + LLVMMetadataRef LowerBound, LLVMMetadataRef UpperBound, + LLVMMetadataRef Stride, LLVMMetadataRef Bias) { + return wrap(unwrap(Builder)->createSubrangeType( + {Name, NameLen}, unwrapDI(File), LineNo, unwrapDI(Scope), + SizeInBits, AlignInBits, map_from_llvmDIFlags(Flags), + unwrapDI(BaseTy), unwrap(LowerBound), unwrap(UpperBound), + unwrap(Stride), unwrap(Bias))); +} + +/// MD may be nullptr, a DIExpression or DIVariable. +PointerUnion unwrapExprVar(LLVMMetadataRef MD) { + if (!MD) + return nullptr; + MDNode *MDN = unwrapDI(MD); + if (auto *E = dyn_cast(MDN)) + return E; + assert(isa(MDN) && "Expected DIExpression or DIVariable"); + return cast(MDN); +} + +LLVMMetadataRef LLVMDIBuilderCreateDynamicArrayType( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, + size_t NameLen, unsigned LineNo, LLVMMetadataRef File, uint64_t Size, + uint32_t AlignInBits, LLVMMetadataRef Ty, LLVMMetadataRef *Subscripts, + unsigned NumSubscripts, LLVMMetadataRef DataLocation, + LLVMMetadataRef Associated, LLVMMetadataRef Allocated, LLVMMetadataRef Rank, + LLVMMetadataRef BitStride) { + auto Subs = + unwrap(Builder)->getOrCreateArray({unwrap(Subscripts), NumSubscripts}); + return wrap(unwrap(Builder)->createArrayType( + unwrapDI(Scope), {Name, NameLen}, unwrapDI(File), LineNo, + Size, AlignInBits, unwrapDI(Ty), Subs, + unwrapExprVar(DataLocation), unwrapExprVar(Associated), + unwrapExprVar(Allocated), unwrapExprVar(Rank), unwrap(BitStride))); +} + +void LLVMReplaceArrays(LLVMDIBuilderRef Builder, LLVMMetadataRef *T, + LLVMMetadataRef *Elements, unsigned NumElements) { + auto CT = unwrap(*T); + auto Elts = + unwrap(Builder)->getOrCreateArray({unwrap(Elements), NumElements}); + unwrap(Builder)->replaceArrays(CT, Elts); +} + LLVMMetadataRef LLVMDIBuilderCreateUnionType( LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name, size_t NameLen, LLVMMetadataRef File, unsigned LineNumber, diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index 96829bd062a78..dc758785e40d5 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -24,10 +24,10 @@ constexpr size_t StringRef::npos; // strncasecmp() is not available on non-POSIX systems, so define an // alternative function here. -static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { - for (size_t I = 0; I < Length; ++I) { - unsigned char LHC = toLower(LHS[I]); - unsigned char RHC = toLower(RHS[I]); +static int ascii_strncasecmp(StringRef LHS, StringRef RHS) { + for (auto [LC, RC] : zip_equal(LHS, RHS)) { + unsigned char LHC = toLower(LC); + unsigned char RHC = toLower(RC); if (LHC != RHC) return LHC < RHC ? -1 : 1; } @@ -35,8 +35,8 @@ static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { } int StringRef::compare_insensitive(StringRef RHS) const { - if (int Res = - ascii_strncasecmp(data(), RHS.data(), std::min(size(), RHS.size()))) + size_t Min = std::min(size(), RHS.size()); + if (int Res = ascii_strncasecmp(take_front(Min), RHS.take_front(Min))) return Res; if (size() == RHS.size()) return 0; @@ -45,13 +45,12 @@ int StringRef::compare_insensitive(StringRef RHS) const { bool StringRef::starts_with_insensitive(StringRef Prefix) const { return size() >= Prefix.size() && - ascii_strncasecmp(data(), Prefix.data(), Prefix.size()) == 0; + ascii_strncasecmp(take_front(Prefix.size()), Prefix) == 0; } bool StringRef::ends_with_insensitive(StringRef Suffix) const { return size() >= Suffix.size() && - ascii_strncasecmp(end() - Suffix.size(), Suffix.data(), - Suffix.size()) == 0; + ascii_strncasecmp(take_back(Suffix.size()), Suffix) == 0; } size_t StringRef::find_insensitive(char C, size_t From) const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 331c8036e26f1..f7de61f044a7d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1352,6 +1352,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); } } + if (Subtarget->hasFullFP16()) + setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom); + for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); @@ -16046,9 +16049,19 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // Scalarize v2f16 to turn it into a faddp. This will be more efficient than + // widening by inserting zeroes. + if (Subtarget->hasFullFP16() && Op.getOpcode() == ISD::VECREDUCE_FADD && + SrcVT == MVT::v2f16) { + SDLoc DL(Op); + return DAG.getNode(ISD::FADD, DL, MVT::f16, + DAG.getExtractVectorElt(DL, MVT::f16, Src, 0), + DAG.getExtractVectorElt(DL, MVT::f16, Src, 1)); + } // Try to lower fixed length reductions to SVE. - EVT SrcVT = Src.getValueType(); bool OverrideNEON = !Subtarget->isNeonAvailable() || Op.getOpcode() == ISD::VECREDUCE_AND || Op.getOpcode() == ISD::VECREDUCE_OR || @@ -17834,17 +17847,19 @@ bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const { bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd( const MachineFunction &MF, EVT VT) const { - VT = VT.getScalarType(); + EVT ScalarVT = VT.getScalarType(); - if (!VT.isSimple()) + if (!ScalarVT.isSimple()) return false; - switch (VT.getSimpleVT().SimpleTy) { + switch (ScalarVT.getSimpleVT().SimpleTy) { case MVT::f16: return Subtarget->hasFullFP16(); case MVT::f32: case MVT::f64: return true; + case MVT::bf16: + return VT.isScalableVector() && Subtarget->hasSVEB16B16(); default: break; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 8847c62690714..c1474773faa76 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -7868,62 +7868,48 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case AArch64MachineCombinerPattern::MULADDWI_OP1: - case AArch64MachineCombinerPattern::MULADDXI_OP1: { + case AArch64MachineCombinerPattern::MULADDXI_OP1: + case AArch64MachineCombinerPattern::MULSUBWI_OP1: + case AArch64MachineCombinerPattern::MULSUBXI_OP1: { // MUL I=A,B,0 - // ADD R,I,Imm - // ==> MOV V, Imm + // ADD/SUB R,I,Imm + // ==> MOV V, Imm/-Imm // ==> MADD R,A,B,V // --- Create(MADD); - const TargetRegisterClass *OrrRC; - unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) { - OrrOpc = AArch64::ORRWri; - OrrRC = &AArch64::GPR32spRegClass; + const TargetRegisterClass *RC; + unsigned BitSize, MovImm; + if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1 || + Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) { + MovImm = AArch64::MOVi32imm; + RC = &AArch64::GPR32spRegClass; BitSize = 32; - ZeroReg = AArch64::WZR; Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { - OrrOpc = AArch64::ORRXri; - OrrRC = &AArch64::GPR64spRegClass; + MovImm = AArch64::MOVi64imm; + RC = &AArch64::GPR64spRegClass; BitSize = 64; - ZeroReg = AArch64::XZR; Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - Register NewVR = MRI.createVirtualRegister(OrrRC); + Register NewVR = MRI.createVirtualRegister(RC); uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { unsigned Val = Root.getOperand(3).getImm(); Imm = Imm << Val; } - uint64_t UImm = SignExtend64(Imm, BitSize); - // The immediate can be composed via a single instruction. + bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 || + Pattern == AArch64MachineCombinerPattern::MULSUBXI_OP1; + uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize); + // Check that the immediate can be composed via a single instruction. SmallVector Insn; AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); if (Insn.size() != 1) return; - auto MovI = Insn.begin(); - MachineInstrBuilder MIB1; - // MOV is an alias for one of three instructions: movz, movn, and orr. - if (MovI->Opcode == OrrOpc) - MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(MovI->Op2); - else { - if (BitSize == 32) - assert((MovI->Opcode == AArch64::MOVNWi || - MovI->Opcode == AArch64::MOVZWi) && - "Expected opcode"); - else - assert((MovI->Opcode == AArch64::MOVNXi || - MovI->Opcode == AArch64::MOVZXi) && - "Expected opcode"); - MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR) - .addImm(MovI->Op1) - .addImm(MovI->Op2); - } + MachineInstrBuilder MIB1 = + BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR) + .addImm(IsSub ? -Imm : Imm); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); @@ -7977,67 +7963,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case AArch64MachineCombinerPattern::MULSUBWI_OP1: - case AArch64MachineCombinerPattern::MULSUBXI_OP1: { - // MUL I=A,B,0 - // SUB R,I, Imm - // ==> MOV V, -Imm - // ==> MADD R,A,B,V // = -Imm + A*B - // --- Create(MADD); - const TargetRegisterClass *OrrRC; - unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) { - OrrOpc = AArch64::ORRWri; - OrrRC = &AArch64::GPR32spRegClass; - BitSize = 32; - ZeroReg = AArch64::WZR; - Opc = AArch64::MADDWrrr; - RC = &AArch64::GPR32RegClass; - } else { - OrrOpc = AArch64::ORRXri; - OrrRC = &AArch64::GPR64spRegClass; - BitSize = 64; - ZeroReg = AArch64::XZR; - Opc = AArch64::MADDXrrr; - RC = &AArch64::GPR64RegClass; - } - Register NewVR = MRI.createVirtualRegister(OrrRC); - uint64_t Imm = Root.getOperand(2).getImm(); - if (Root.getOperand(3).isImm()) { - unsigned Val = Root.getOperand(3).getImm(); - Imm = Imm << Val; - } - uint64_t UImm = SignExtend64(-Imm, BitSize); - // The immediate can be composed via a single instruction. - SmallVector Insn; - AArch64_IMM::expandMOVImm(UImm, BitSize, Insn); - if (Insn.size() != 1) - return; - auto MovI = Insn.begin(); - MachineInstrBuilder MIB1; - // MOV is an alias for one of three instructions: movz, movn, and orr. - if (MovI->Opcode == OrrOpc) - MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR) - .addReg(ZeroReg) - .addImm(MovI->Op2); - else { - if (BitSize == 32) - assert((MovI->Opcode == AArch64::MOVNWi || - MovI->Opcode == AArch64::MOVZWi) && - "Expected opcode"); - else - assert((MovI->Opcode == AArch64::MOVNXi || - MovI->Opcode == AArch64::MOVZXi) && - "Expected opcode"); - MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR) - .addImm(MovI->Op1) - .addImm(MovI->Op2); - } - InsInstrs.push_back(MIB1); - InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); - break; - } case AArch64MachineCombinerPattern::MULADDv8i8_OP1: Opc = AArch64::MLAv8i8; RC = &AArch64::FPR64RegClass; diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index dde4e7ab0e890..e6b22695761e7 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -2529,31 +2529,63 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( return E; } - for (unsigned Count = 0; MBBI != E && Count < Limit; - MBBI = next_nodbg(MBBI, E)) { - MachineInstr &MI = *MBBI; - - // Don't count transient instructions towards the search limit since there - // may be different numbers of them if e.g. debug information is present. - if (!MI.isTransient()) - ++Count; - - // If we found a match, return it. - if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset)) - return MBBI; + unsigned Count = 0; + MachineBasicBlock *CurMBB = I->getParent(); + // choice of next block to visit is liveins-based + bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness(); + + while (true) { + for (MachineBasicBlock::iterator CurEnd = CurMBB->end(); + MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) { + MachineInstr &MI = *MBBI; + + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; + + // If we found a match, return it. + if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset)) + return MBBI; + + // Update the status of what the instruction clobbered and used. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + + // Otherwise, if the base register is used or modified, we have no match, + // so return early. If we are optimizing SP, do not allow instructions + // that may load or store in between the load and the optimized value + // update. + if (!ModifiedRegUnits.available(BaseReg) || + !UsedRegUnits.available(BaseReg) || + (BaseRegSP && MBBI->mayLoadOrStore())) + return E; + } - // Update the status of what the instruction clobbered and used. - LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + if (!VisitSucc || Limit <= Count) + break; - // Otherwise, if the base register is used or modified, we have no match, so - // return early. - // If we are optimizing SP, do not allow instructions that may load or store - // in between the load and the optimized value update. - if (!ModifiedRegUnits.available(BaseReg) || - !UsedRegUnits.available(BaseReg) || - (BaseRegSP && MBBI->mayLoadOrStore())) - return E; + // Try to go downward to successors along a CF path w/o side enters + // such that BaseReg is alive along it but not at its exits + MachineBasicBlock *SuccToVisit = nullptr; + unsigned LiveSuccCount = 0; + for (MachineBasicBlock *Succ : CurMBB->successors()) { + for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) { + if (Succ->isLiveIn(*AI)) { + if (LiveSuccCount++) + return E; + if (Succ->pred_size() == 1) + SuccToVisit = Succ; + break; + } + } + } + if (!SuccToVisit) + break; + CurMBB = SuccToVisit; + MBBI = CurMBB->begin(); } + return E; } diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 3b7e5a6c2b1cf..a0320f919e8c5 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2490,6 +2490,8 @@ multiclass sve_fp_3op_p_zds_a_bfloat opc, string asm, string Ps, SVEPseudo2Instr, SVEInstr2Rev; def : SVE_4_Op_Pat(NAME)>; + def : SVE_4_Op_Pat(NAME)>; + def : SVE_4_Op_Pat(NAME)>; } class sve_fp_3op_p_zds_b sz, bits<2> opc, string asm, diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 91ace4d2b7f16..31420caca0899 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2488,6 +2488,10 @@ def HasFmaakFmamkF32Insts : Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; +def HasFmaakFmamkF64Insts : + Predicate<"Subtarget->hasFmaakFmamkF64Insts()">, + AssemblerPredicate<(any_of FeatureGFX1250Insts)>; + def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, AssemblerPredicate<(all_of FeatureImageInsts)>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 92fa66074b26e..6439230b8769f 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -336,6 +336,20 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); } + bool isRegOrInlineImmWithFP64InputMods() const { + return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64); + } + + bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); } + + bool isVRegWithFP32InputMods() const { + return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID); + } + + bool isVRegWithFP64InputMods() const { + return isVRegWithInputMods(AMDGPU::VReg_64RegClassID); + } + bool isPackedFP16InputMods() const { return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16); } @@ -531,7 +545,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); } - bool isVCSrcB64() const { + bool isVCSrc_b64() const { return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); } @@ -557,7 +571,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); } - bool isVCSrcF64() const { + bool isVCSrc_f64() const { return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); } @@ -605,7 +619,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr(); } - bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); } + bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); } bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); } @@ -621,15 +635,11 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); } - bool isVCSrcV2FP32() const { - return isVCSrcF64(); - } + bool isVCSrcV2FP32() const { return isVCSrc_f64(); } bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); } - bool isVCSrcV2INT32() const { - return isVCSrcB64(); - } + bool isVCSrc_v2b32() const { return isVCSrc_b64(); } bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); } @@ -637,7 +647,7 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr(); } - bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); } + bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); } bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); } @@ -941,6 +951,8 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isLiteralImm(MVT::f16); } + bool isKImmFP64() const { return isLiteralImm(MVT::f64); } + bool isMem() const override { return false; } @@ -1531,6 +1543,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } + bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); } + bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } bool isGFX10_BEncoding() const { @@ -1782,8 +1796,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); bool validateSOPLiteral(const MCInst &Inst) const; bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); - bool validateVOPDRegBankConstraints(const MCInst &Inst, - const OperandVector &Operands); + std::optional checkVOPDRegBankConstraints(const MCInst &Inst, + bool AsVOPD3); + bool validateVOPD(const MCInst &Inst, const OperandVector &Operands); + bool tryVOPD(const MCInst &Inst); + bool tryVOPD3(const MCInst &Inst); + bool tryAnotherVOPDEncoding(const MCInst &Inst); + bool validateIntClampSupported(const MCInst &Inst); bool validateMIMGAtomicDMask(const MCInst &Inst); bool validateMIMGGatherDMask(const MCInst &Inst); @@ -1986,6 +2005,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + case AMDGPU::OPERAND_KIMM64: return &APFloat::IEEEdouble(); case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: @@ -2326,6 +2346,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // in predicate methods (isLiteralImm()) llvm_unreachable("fp literal in 64-bit integer instruction."); + case AMDGPU::OPERAND_KIMM64: + Inst.addOperand(MCOperand::createImm(Val)); + setImmKindMandatoryLiteral(); + return; + case AMDGPU::OPERAND_REG_IMM_BF16: case AMDGPU::OPERAND_REG_INLINE_C_BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: @@ -2531,6 +2556,13 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); setImmKindMandatoryLiteral(); return; + case AMDGPU::OPERAND_KIMM64: + if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64) + Val <<= 32; + + Inst.addOperand(MCOperand::createImm(Val)); + setImmKindMandatoryLiteral(); + return; default: llvm_unreachable("invalid operand size"); } @@ -3569,6 +3601,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } } + // Asm can first try to match VOPD or VOPD3. By failing early here with + // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD. + // Checking later during validateInstruction does not give a chance to retry + // parsing as a different encoding. + if (tryAnotherVOPDEncoding(Inst)) + return Match_InvalidOperand; + return Match_Success; } @@ -3749,8 +3788,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode, return {getNamedOperandIdx(Opcode, OpName::src0X), getNamedOperandIdx(Opcode, OpName::vsrc1X), + getNamedOperandIdx(Opcode, OpName::vsrc2X), getNamedOperandIdx(Opcode, OpName::src0Y), getNamedOperandIdx(Opcode, OpName::vsrc1Y), + getNamedOperandIdx(Opcode, OpName::vsrc2Y), ImmXIdx, ImmIdx}; } @@ -3880,12 +3921,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations( return false; } -bool AMDGPUAsmParser::validateVOPDRegBankConstraints( - const MCInst &Inst, const OperandVector &Operands) { +std::optional +AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) { const unsigned Opcode = Inst.getOpcode(); if (!isVOPD(Opcode)) - return true; + return {}; const MCRegisterInfo *TRI = getContext().getRegisterInfo(); @@ -3896,16 +3937,64 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints( : MCRegister(); }; - // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. - bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; + // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 + // source-cache. + bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 || + Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 || + Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250; + bool AllowSameVGPR = isGFX1250(); + + if (AsVOPD3) { // Literal constants are not allowed with VOPD3. + for (auto OpName : {OpName::src0X, OpName::src0Y}) { + int I = getNamedOperandIdx(Opcode, OpName); + const MCOperand &Op = Inst.getOperand(I); + if (!Op.isImm()) + continue; + int64_t Imm = Op.getImm(); + if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) && + !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm())) + return (unsigned)I; + } + + for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X, + OpName::vsrc2Y, OpName::imm}) { + int I = getNamedOperandIdx(Opcode, OpName); + if (I == -1) + continue; + const MCOperand &Op = Inst.getOperand(I); + if (Op.isImm()) + return (unsigned)I; + } + } const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); - auto InvalidCompOprIdx = - InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); - if (!InvalidCompOprIdx) + auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex( + getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3); + + return InvalidCompOprIdx; +} + +bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst, + const OperandVector &Operands) { + + unsigned Opcode = Inst.getOpcode(); + bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3; + + if (AsVOPD3) { + for (unsigned I = 0, E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) && + (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS)) + Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions"); + } + } + + auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3); + if (!InvalidCompOprIdx.has_value()) return true; auto CompOprIdx = *InvalidCompOprIdx; + const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); auto ParsedIdx = std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); @@ -3913,7 +4002,10 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints( auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); if (CompOprIdx == VOPD::Component::DST) { - Error(Loc, "one dst register must be even and the other odd"); + if (AsVOPD3) + Error(Loc, "dst registers must be distinct"); + else + Error(Loc, "one dst register must be even and the other odd"); } else { auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; Error(Loc, Twine("src") + Twine(CompSrcIdx) + @@ -3923,6 +4015,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints( return false; } +// \returns true if \p Inst does not satisfy VOPD constraints, but can be +// potentially used as VOPD3 with the same operands. +bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) { + // First check if it fits VOPD + auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false); + if (!InvalidCompOprIdx.has_value()) + return false; + + // Then if it fits VOPD3 + InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true); + if (InvalidCompOprIdx.has_value()) { + // If failed operand is dst it is better to show error about VOPD3 + // instruction as it has more capabilities and error message will be + // more informative. If the dst is not legal for VOPD3, then it is not + // legal for VOPD either. + if (*InvalidCompOprIdx == VOPD::Component::DST) + return true; + + // Otherwise prefer VOPD as we may find ourselves in an awkward situation + // with a conflict in tied implicit src2 of fmac and no asm operand to + // to point to. + return false; + } + return true; +} + +// \returns true is a VOPD3 instruction can be also represented as a shorter +// VOPD encoding. +bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) { + const unsigned Opcode = Inst.getOpcode(); + const auto &II = getVOPDInstInfo(Opcode, &MII); + unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI()); + if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X || + !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y) + return false; + + // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has + // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot + // be parsed as VOPD which does not accept src2. + if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 || + II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32) + return false; + + // If any modifiers are set this cannot be VOPD. + for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers, + OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers, + OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) { + int I = getNamedOperandIdx(Opcode, OpName); + if (I == -1) + continue; + if (Inst.getOperand(I).getImm()) + return false; + } + + return !tryVOPD3(Inst); +} + +// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD +// form but switch to VOPD3 otherwise. +bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) { + const unsigned Opcode = Inst.getOpcode(); + if (!isGFX1250() || !isVOPD(Opcode)) + return false; + + if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3) + return tryVOPD(Inst); + return tryVOPD3(Inst); +} + bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); @@ -4846,7 +5007,7 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, unsigned NumExprs = 0; unsigned NumLiterals = 0; - uint32_t LiteralValue; + uint64_t LiteralValue; for (int OpIdx : OpIndices) { if (OpIdx == -1) @@ -4860,16 +5021,21 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { uint64_t Value = static_cast(MO.getImm()); - bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && + bool IsForcedFP64 = + Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 || + (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 && + HasMandatoryLiteral); + bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) && AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); - if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { + if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) && + !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) { Error(getLitLoc(Operands), "invalid operand for instruction"); return false; } - if (IsFP64 && IsValid32Op) + if (IsFP64 && IsValid32Op && !IsForcedFP64) Value = Hi_32(Value); if (NumLiterals == 0 || LiteralValue != Value) { @@ -5243,7 +5409,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, if (!validateConstantBusLimitations(Inst, Operands)) { return false; } - if (!validateVOPDRegBankConstraints(Inst, Operands)) { + if (!validateVOPD(Inst, Operands)) { return false; } if (!validateIntClampSupported(Inst)) { @@ -9244,8 +9410,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { // Create VOPD MCInst operands using parsed assembler operands. void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + return; + } if (Op.isReg()) { Op.addRegOperands(Inst, 1); return; @@ -9274,6 +9446,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { if (CInfo.hasSrc2Acc()) addOp(CInfo.getIndexOfDstInParsedOperands()); } + + int BitOp3Idx = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3); + if (BitOp3Idx != -1) { + OptionalImmIndexMap OptIdx; + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back()); + if (Op.isImm()) + OptIdx[Op.getImmTy()] = Operands.size() - 1; + + addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3); + } } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 7b1ea11d58168..98f7e17e9528c 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -353,6 +353,13 @@ static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm)); } +static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, + uint64_t Addr, + const MCDisassembler *Decoder) { + const auto *DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm)); +} + static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder) { const auto *DAsm = static_cast(Decoder); @@ -613,6 +620,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS)) break; + if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) { + // Return 8 bytes for a potential literal. + Bytes = Bytes_.slice(4, MaxInstBytesNum - 4); + + if (isGFX1250() && + tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS)) + break; + } + // Reinitialize Bytes Bytes = Bytes_.slice(0, MaxInstBytesNum); @@ -1467,6 +1483,17 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { return MCOperand::createImm(Literal); } +MCOperand +AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const { + if (HasLiteral) { + if (Literal64 != Val) + return errOperand(Val, "More than one unique literal is illegal"); + } + HasLiteral = true; + Literal = Literal64 = Val; + return MCOperand::createImm(Literal64); +} + MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 8927f208fd2af..84041001b6ba7 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -178,6 +178,7 @@ class AMDGPUDisassembler : public MCDisassembler { static MCOperand decodeIntImmed(unsigned Imm); MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const; + MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const; MCOperand decodeLiteralConstant(bool ExtendFP64) const; MCOperand decodeLiteral64Constant() const; diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp index ccc711a0bcc4e..27f40f1705bb4 100644 --- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp +++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp @@ -42,11 +42,13 @@ class GCNCreateVOPD { class VOPDCombineInfo { public: VOPDCombineInfo() = default; - VOPDCombineInfo(MachineInstr *First, MachineInstr *Second) - : FirstMI(First), SecondMI(Second) {} + VOPDCombineInfo(MachineInstr *First, MachineInstr *Second, + bool VOPD3 = false) + : FirstMI(First), SecondMI(Second), IsVOPD3(VOPD3) {} MachineInstr *FirstMI; MachineInstr *SecondMI; + bool IsVOPD3; }; public: @@ -59,9 +61,9 @@ class GCNCreateVOPD { unsigned Opc2 = SecondMI->getOpcode(); unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(SII->getSubtarget()); - int NewOpcode = - AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1), - AMDGPU::getVOPDOpcode(Opc2), EncodingFamily); + int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1, CI.IsVOPD3), + AMDGPU::getVOPDOpcode(Opc2, CI.IsVOPD3), + EncodingFamily, CI.IsVOPD3); assert(NewOpcode != -1 && "Should have previously determined this as a possible VOPD\n"); @@ -79,12 +81,36 @@ class GCNCreateVOPD { VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); } + const AMDGPU::OpName Mods[2][3] = { + {AMDGPU::OpName::src0X_modifiers, AMDGPU::OpName::vsrc1X_modifiers, + AMDGPU::OpName::vsrc2X_modifiers}, + {AMDGPU::OpName::src0Y_modifiers, AMDGPU::OpName::vsrc1Y_modifiers, + AMDGPU::OpName::vsrc2Y_modifiers}}; + const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers}; + const unsigned VOPDOpc = VOPDInst->getOpcode(); + for (auto CompIdx : VOPD::COMPONENTS) { auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum(); + bool IsVOP3 = SII->isVOP3(*MI[CompIdx]); for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) { - auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx); + if (AMDGPU::hasNamedOperand(VOPDOpc, Mods[CompIdx][CompSrcIdx])) { + const MachineOperand *Mod = + SII->getNamedOperand(*MI[CompIdx], SrcMods[CompSrcIdx]); + VOPDInst.addImm(Mod ? Mod->getImm() : 0); + } + auto MCOprIdx = + InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx, IsVOP3); VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); } + if (MI[CompIdx]->getOpcode() == AMDGPU::V_CNDMASK_B32_e32 && CI.IsVOPD3) + VOPDInst.addReg(AMDGPU::VCC_LO); + } + + if (CI.IsVOPD3) { + if (unsigned BitOp2 = AMDGPU::getBitOp2(Opc2)) + VOPDInst.addImm(BitOp2); } SII->fixImplicitOperands(*VOPDInst); @@ -109,6 +135,8 @@ class GCNCreateVOPD { const SIInstrInfo *SII = ST->getInstrInfo(); bool Changed = false; + unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(*ST); + bool HasVOPD3 = ST->hasVOPD3(); SmallVector ReplaceCandidates; @@ -124,19 +152,27 @@ class GCNCreateVOPD { auto *SecondMI = &*MII; unsigned Opc = FirstMI->getOpcode(); unsigned Opc2 = SecondMI->getOpcode(); - llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); - llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); VOPDCombineInfo CI; - if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) - CI = VOPDCombineInfo(FirstMI, SecondMI); - else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) - CI = VOPDCombineInfo(SecondMI, FirstMI); - else - continue; - // checkVOPDRegConstraints cares about program order, but doReplace - // cares about X-Y order in the constituted VOPD - if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) { + const auto checkVOPD = [&](bool VOPD3) -> bool { + llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = + AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3); + llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = + AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3); + + if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) + CI = VOPDCombineInfo(FirstMI, SecondMI, VOPD3); + else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) + CI = VOPDCombineInfo(SecondMI, FirstMI, VOPD3); + else + return false; + // checkVOPDRegConstraints cares about program order, but doReplace + // cares about X-Y order in the constituted VOPD + return llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI, + VOPD3); + }; + + if (checkVOPD(false) || (HasVOPD3 && checkVOPD(true))) { ReplaceCandidates.push_back(CI); ++MII; } diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index b5ffa64c3a4b4..46b7c2f50780d 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -9,11 +9,11 @@ // The code produced for "generic" is only useful for tests and cannot // reasonably be expected to execute on any particular target. def : ProcessorModel<"generic", NoSchedModel, - [FeatureGDS, FeatureGWS] + [] >; def : ProcessorModel<"generic-hsa", NoSchedModel, - [FeatureGDS, FeatureGWS, FeatureFlatAddressSpace] + [FeatureFlatAddressSpace] >; //===------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index acdd369f17925..e6dd98a104209 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1100,6 +1100,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return getGeneration() >= GFX10 || hasGFX940Insts(); } + bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); } + bool hasImageInsts() const { return HasImageInsts; } @@ -1482,6 +1484,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasGFX1250Insts() const { return GFX1250Insts; } + bool hasVOPD3() const { return GFX1250Insts; } + // \returns true if target has S_SETPRIO_INC_WG instruction. bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; } diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp index 33c208495c500..9e66909e41052 100644 --- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp +++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp @@ -36,11 +36,19 @@ using namespace llvm; bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, - const MachineInstr &SecondMI) { + const MachineInstr &SecondMI, bool IsVOPD3) { namespace VOPD = AMDGPU::VOPD; const MachineFunction *MF = FirstMI.getMF(); const GCNSubtarget &ST = MF->getSubtarget(); + + if (IsVOPD3 && !ST.hasVOPD3()) + return false; + if (!IsVOPD3 && (TII.isVOP3(FirstMI) || TII.isVOP3(SecondMI))) + return false; + if (TII.isDPP(FirstMI) || TII.isDPP(SecondMI)) + return false; + const SIRegisterInfo *TRI = dyn_cast(ST.getRegisterInfo()); const MachineRegisterInfo &MRI = MF->getRegInfo(); // Literals also count against scalar bus limit @@ -80,23 +88,61 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, for (auto CompIdx : VOPD::COMPONENTS) { const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI; - const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0); + const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0); if (Src0.isReg()) { if (!TRI->isVectorRegister(MRI, Src0.getReg())) { if (!is_contained(UniqueScalarRegs, Src0.getReg())) UniqueScalarRegs.push_back(Src0.getReg()); } - } else { - if (!TII.isInlineConstant(MI, VOPD::Component::SRC0)) - addLiteral(Src0); + } else if (!TII.isInlineConstant(Src0)) { + if (IsVOPD3) + return false; + addLiteral(Src0); } if (InstInfo[CompIdx].hasMandatoryLiteral()) { + if (IsVOPD3) + return false; + auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex(); addLiteral(MI.getOperand(CompOprIdx)); } if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC)) UniqueScalarRegs.push_back(AMDGPU::VCC_LO); + + if (IsVOPD3) { + for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) { + const MachineOperand *Src = TII.getNamedOperand(MI, OpName); + if (!Src) + continue; + if (OpName == AMDGPU::OpName::src2) { + if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3)) + continue; + if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) { + UniqueScalarRegs.push_back(Src->getReg()); + continue; + } + } + if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg())) + return false; + } + + for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod, + AMDGPU::OpName::op_sel}) { + if (TII.hasModifiersSet(MI, OpName)) + return false; + } + + // Neg is allowed, other modifiers are not. NB: even though sext has the + // same value as neg, there are no combinable instructions with sext. + for (auto OpName : + {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers}) { + const MachineOperand *Mods = TII.getNamedOperand(MI, OpName); + if (Mods && (Mods->getImm() & ~SISrcMods::NEG)) + return false; + } + } } if (UniqueLiterals.size() > 1) @@ -104,14 +150,33 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) return false; - // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. + // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 + // source-cache. bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 && FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 && SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32; + bool AllowSameVGPR = ST.hasGFX1250Insts(); - if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc)) + if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, + IsVOPD3)) return false; + if (IsVOPD3) { + // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero. + if (AMDGPU::hasNamedOperand(SecondMI.getOpcode(), AMDGPU::OpName::bitop3)) { + const MachineOperand &Src2 = + *TII.getNamedOperand(SecondMI, AMDGPU::OpName::src2); + if (!Src2.isImm() || Src2.getImm()) + return false; + } + if (AMDGPU::hasNamedOperand(FirstMI.getOpcode(), AMDGPU::OpName::bitop3)) { + const MachineOperand &Src2 = + *TII.getNamedOperand(FirstMI, AMDGPU::OpName::src2); + if (!Src2.isImm() || Src2.getImm()) + return false; + } + } + LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI << "\n\tY: " << SecondMI << "\n"); return true; @@ -125,21 +190,28 @@ static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const MachineInstr *FirstMI, const MachineInstr &SecondMI) { const SIInstrInfo &STII = static_cast(TII); + const GCNSubtarget &ST = STII.getSubtarget(); + unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST); unsigned Opc2 = SecondMI.getOpcode(); - auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); - // One instruction case - if (!FirstMI) - return SecondCanBeVOPD.Y; + const auto checkVOPD = [&](bool VOPD3) -> bool { + auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3); - unsigned Opc = FirstMI->getOpcode(); - auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); + // One instruction case + if (!FirstMI) + return SecondCanBeVOPD.Y || SecondCanBeVOPD.X; - if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || - (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) - return false; + unsigned Opc = FirstMI->getOpcode(); + auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3); + + if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || + (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) + return false; + + return checkVOPDRegConstraints(STII, *FirstMI, SecondMI, VOPD3); + }; - return checkVOPDRegConstraints(STII, *FirstMI, SecondMI); + return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true)); } namespace { diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h index 22361b9a1a078..f776ae95e79c4 100644 --- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h +++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h @@ -23,7 +23,7 @@ class SIInstrInfo; bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, - const MachineInstr &SecondMI); + const MachineInstr &SecondMI, bool IsVOPD3); std::unique_ptr createVOPDPairingMutation(); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 8ce12dfeda779..cb6319ed627ca 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -76,6 +76,18 @@ void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff); } +void AMDGPUInstPrinter::printFP64ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + // KIMM64 + // This part needs to align with AMDGPUInstPrinter::printImmediate64. + uint64_t Imm = MI->getOperand(OpNo).getImm(); + if (STI.hasFeature(AMDGPU::Feature64BitLiterals) && Lo_32(Imm)) + O << "lit64(" << formatHex(static_cast(Imm)) << ')'; + else + O << formatHex(static_cast(Hi_32(Imm))); +} + void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef BitName) { if (MI->getOperand(OpNo).getImm()) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 071e0a9d0fee6..fb803b1f81342 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -42,6 +42,8 @@ class AMDGPUInstPrinter : public MCInstPrinter { void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printFP64ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef BitName); void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index f0f655e93f4cc..4bb3942936f04 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -87,9 +87,10 @@ class AMDGPUMCCodeEmitter : public MCCodeEmitter { const MCSubtargetInfo &STI) const; /// Encode an fp or int literal. - std::optional getLitEncoding(const MCOperand &MO, - const MCOperandInfo &OpInfo, - const MCSubtargetInfo &STI) const; + std::optional + getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo, + const MCSubtargetInfo &STI, + bool HasMandatoryLiteral = false) const; void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups, APInt &Inst, APInt &Scratch, @@ -265,10 +266,9 @@ static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI, : 255; } -std::optional -AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO, - const MCOperandInfo &OpInfo, - const MCSubtargetInfo &STI) const { +std::optional AMDGPUMCCodeEmitter::getLitEncoding( + const MCOperand &MO, const MCOperandInfo &OpInfo, + const MCSubtargetInfo &STI, bool HasMandatoryLiteral) const { int64_t Imm; if (MO.isExpr()) { if (!MO.getExpr()->evaluateAsAbsolute(Imm)) @@ -303,9 +303,13 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO, case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - case AMDGPU::OPERAND_REG_IMM_FP64: return getLit64Encoding(static_cast(Imm), STI, true); + case AMDGPU::OPERAND_REG_IMM_FP64: { + auto Enc = getLit64Encoding(static_cast(Imm), STI, true); + return (HasMandatoryLiteral && Enc == 255) ? 254 : Enc; + } + case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: return getLit16IntEncoding(static_cast(Imm), STI); @@ -339,6 +343,7 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO, case AMDGPU::OPERAND_KIMM32: case AMDGPU::OPERAND_KIMM16: + case AMDGPU::OPERAND_KIMM64: return MO.getImm(); default: llvm_unreachable("invalid operand size"); @@ -685,7 +690,10 @@ void AMDGPUMCCodeEmitter::getMachineOpValueCommon( const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (AMDGPU::isSISrcOperand(Desc, OpNo)) { - if (auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI)) { + bool HasMandatoryLiteral = + AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm); + if (auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI, + HasMandatoryLiteral)) { Op = *Enc; return; } diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index bd7359a656716..a8649970aa825 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -98,6 +98,8 @@ enum : uint64_t { // VINTERP instruction format. VINTERP = 1 << 29, + VOPD3 = 1 << 30, + // High bits - other information. VM_CNT = UINT64_C(1) << 32, EXP_CNT = UINT64_C(1) << 33, @@ -227,6 +229,7 @@ enum OperandType : unsigned { /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32, OPERAND_KIMM16, + OPERAND_KIMM64, /// Operands with an AccVGPR register or inline constant OPERAND_REG_INLINE_AC_INT32, @@ -252,7 +255,7 @@ enum OperandType : unsigned { OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, OPERAND_KIMM_FIRST = OPERAND_KIMM32, - OPERAND_KIMM_LAST = OPERAND_KIMM16 + OPERAND_KIMM_LAST = OPERAND_KIMM64 }; } @@ -260,16 +263,16 @@ enum OperandType : unsigned { // Input operand modifiers bit-masks // NEG and SEXT share same bit-mask because they can't be set simultaneously. namespace SISrcMods { - enum : unsigned { - NONE = 0, - NEG = 1 << 0, // Floating-point negate modifier - ABS = 1 << 1, // Floating-point absolute modifier - SEXT = 1 << 0, // Integer sign-extend modifier - NEG_HI = ABS, // Floating-point negate high packed component modifier. - OP_SEL_0 = 1 << 2, - OP_SEL_1 = 1 << 3, - DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) - }; +enum : unsigned { + NONE = 0, + NEG = 1 << 0, // Floating-point negate modifier + ABS = 1 << 1, // Floating-point absolute modifier + SEXT = 1 << 4, // Integer sign-extend modifier + NEG_HI = ABS, // Floating-point negate high packed component modifier. + OP_SEL_0 = 1 << 2, + OP_SEL_1 = 1 << 3, + DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) +}; } namespace SIOutMods { diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index c27d4e0df6fc5..a368bc5d0b1a1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -55,6 +55,8 @@ class InstSI ; // constant bus. def KImmFP16 : KImmFPOperand; +// 64-bit VALU immediate operand that uses the constant bus. +def KImmFP64 : KImmFPOperand { + let DecoderMethod = "decodeOperand_KImmFP64"; + let PrintMethod = "printFP64ImmOperand"; +} + class FPInputModsMatchClass : AsmOperandClass { let Name = "RegOrImmWithFP"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithFPInputMods"; @@ -1327,6 +1333,11 @@ class FPVCSrcInputModsMatchClass : FPInputModsMatchClass { let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; } +class FPVRegSrcInputModsMatchClass : FPInputModsMatchClass { + let Name = "VRegWithFP"#opSize#"InputMods"; + let PredicateMethod = "isVRegWithFP"#opSize#"InputMods"; +} + def FP16InputModsMatchClass : FPInputModsMatchClass<16>; class FPT16InputModsMatchClass : FPInputModsMatchClass<16> { let Name = !if(IsFake16, "RegOrImmWithFPFake16InputMods", @@ -1345,6 +1356,10 @@ class FP16VCSrcInputModsMatchClass !if(IsFake16, "true", "false") # ">"; } def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; +def FP64VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<64>; + +def FP32VRegSrcInputModsMatchClass : FPVRegSrcInputModsMatchClass<32>; +def FP64VRegSrcInputModsMatchClass : FPVRegSrcInputModsMatchClass<64>; class InputMods : Operand { let OperandNamespace = "AMDGPU"; @@ -1371,6 +1386,10 @@ class FPT16VCSrcInputMods let EncoderMethod = "getMachineOpValueT16"; } def FP32VCSrcInputMods : FPInputMods; +def FP64VCSrcInputMods : FPInputMods; + +def FP32VRegSrcInputMods : FPInputMods; +def FP64VRegSrcInputMods : FPInputMods; class IntInputModsMatchClass : AsmOperandClass { let Name = "RegOrImmWithInt"#opSize#"InputMods"; @@ -1782,6 +1801,32 @@ class getVOPSrc0ForVT { 1 : VSrc_b32); } +// Returns the register class to use for source VGPR, SGPR or inline constant +// for the given VT. +class getVCSrcForVT { + RegisterOperand ret = + !if(VT.isFP, + !if(!eq(VT.Size, 64), + VCSrc_f64, + !cond(!eq(VT.Value, f16.Value) : VCSrc_f16, + !eq(VT.Value, bf16.Value) : VCSrc_bf16, + !eq(VT.Value, v2f16.Value) : VCSrc_v2f16, + !eq(VT.Value, v2bf16.Value) : VCSrc_v2bf16, + 1 : VCSrc_f32) + ), + !if(!eq(VT.Size, 64), + VCSrc_b64, + !if(!eq(VT.Value, i16.Value), + VCSrc_b16, + !if(!eq(VT.Value, v2i16.Value), + VCSrc_v2b16, + VCSrc_b32 + ) + ) + ) + ); +} + class getSOPSrcForVT { RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); } @@ -1922,6 +1967,20 @@ class getSrcModDPP_t16 { IntT16_Lo128VRegInputMods, IntVRegInputMods)); } +// Return type of input modifiers operand for specified input operand for DPP +// or VOPD3. +class getSrcModVOP3VC { + Operand ret = + !if (VT.isFP, + !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), + FPT16VCSrcInputMods, + !if (!eq(VT.Value, f64.Value), FP64VCSrcInputMods, + FP32VCSrcInputMods)), + !if (!eq(VT.Value, i16.Value), + IntT16VCSrcInputMods, + Int32VCSrcInputMods)); +} + // Return type of input modifiers operand for specified input operand for DPP // True16: If the destination is a 16-bit value, the src0 modifier must hold // dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0. @@ -1943,16 +2002,12 @@ class getSrc0ModVOP3DPP { Operand ret = !if(!and(!not(IsFake16), !eq(DstVT.Size, 16)), T16Dst, Normal); } -// GFX11 only supports VGPR src1, but the restriction is done in AsmParser -// and GCNDPPCombine. -class getSrcModVOP3DPP { +// Return type of input modifiers operand for specified input operand for VGPR +// only operands (VOPD3 vsrc1 and vsrc2). +class getSrcModVOP3V { Operand ret = - !if (VT.isFP, - !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), - FPT16VCSrcInputMods, FP32VCSrcInputMods), - !if (!eq(VT.Value, i16.Value), - IntT16VCSrcInputMods, - Int32VCSrcInputMods)); + !if (!eq(VT.Value, f64.Value), FP64VRegSrcInputMods, + FP32VRegSrcInputMods); } // Return type of input modifiers operand specified input operand for SDWA @@ -2185,6 +2240,27 @@ class getInsSDWA { + dag Src0 = !if(HasModifiers, + !if(IsCompY, (ins Src0Mod:$src0Y_modifiers, Src0VOPD3:$src0Y), + (ins Src0Mod:$src0X_modifiers, Src0VOPD3:$src0X)), + !if(IsCompY, (ins Src0VOPD3:$src0Y), (ins Src0VOPD3:$src0X))); + dag Src1 = !if(HasModifiers, + !if(IsCompY, (ins Src1Mod:$vsrc1Y_modifiers, Src1VOPD3:$vsrc1Y), + (ins Src1Mod:$vsrc1X_modifiers, Src1VOPD3:$vsrc1X)), + !if(IsCompY, (ins Src1VOPD3:$vsrc1Y), (ins Src1VOPD3:$vsrc1X))); + dag Src2 = !if(HasModifiers, + !if(IsCompY, (ins Src2Mod:$vsrc2Y_modifiers, Src2VOPD3:$vsrc2Y), + (ins Src2Mod:$vsrc2X_modifiers, Src2VOPD3:$vsrc2X)), + !if(IsCompY, (ins Src2VOPD3:$vsrc2Y), (ins Src2VOPD3:$vsrc2X))); + dag ret = !con(Src0, + !if(HasSrc1, Src1, (ins)), + !if(HasSrc2, Src2, (ins))); +} + // Outs for DPP class getOutsDPP { dag ret = !if(HasDst, @@ -2216,13 +2292,16 @@ class getAsm32 { !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); } -class getAsmVOPDPart { +class getAsmVOPDPart { + string mods = !if(HasModifiers, "_modifiers", ""); string dst = "$vdst" # XorY; - string src0 = ", $src0" # XorY; - string src1 = ", $vsrc1" # XorY; + string src0 = ", $src0" # XorY # mods; + string src1 = ", $vsrc1" # XorY # mods; + string src2 = ", $vsrc2" # XorY # mods; string ret = dst # !if(!ge(NumSrcArgs, 1), src0, "") # - !if(!ge(NumSrcArgs, 2), src1, ""); + !if(!ge(NumSrcArgs, 2), src1, "") # + !if(HasVOPD3Src2, src2, ""); } // Returns the assembly string for the inputs and outputs of a VOP3P @@ -2515,10 +2594,16 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field Operand Src1ModDPP = getSrcModDPP.ret; field Operand Src2ModDPP = getSrcModDPP.ret; field Operand Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - field Operand Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - field Operand Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + field Operand Src1ModVOP3DPP = getSrcModVOP3VC.ret; + field Operand Src2ModVOP3DPP = getSrcModVOP3VC.ret; field Operand Src0ModSDWA = getSrcModSDWA.ret; field Operand Src1ModSDWA = getSrcModSDWA.ret; + field RegisterOperand Src0VOPD3 = getVCSrcForVT.ret; + field RegisterOperand Src1VOPD3 = getVregSrcForVT.ret; + field RegisterOperand Src2VOPD3 = getVregSrcForVT.ret; + field Operand Src0ModVOPD3 = getSrcModVOP3VC.ret; + field Operand Src1ModVOPD3 = getSrcModVOP3V.ret; + field Operand Src2ModVOPD3 = getSrcModVOP3V.ret; field bit IsMAI = 0; @@ -2642,6 +2727,13 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { // component is FMAAK or FMAMK field dag InsVOPDX_immX = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32, VSrc_f16):$src0X, VGPR_32:$vsrc1X); field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); + field bit HasVOPD3Src2 = HasSrc2; + field dag InsVOPD3X = getInsVOPD3.ret; + field dag InsVOPD3Y = getInsVOPD3.ret; field string Asm32 = getAsm32.ret; field string AsmDPP = !if(HasExtDPP, @@ -2662,6 +2754,8 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field string AsmSDWA9 = getAsmSDWA9.ret; field string AsmVOPDX = getAsmVOPDPart.ret; field string AsmVOPDY = getAsmVOPDPart.ret; + field string AsmVOPD3X = getAsmVOPDPart.ret; + field string AsmVOPD3Y = getAsmVOPDPart.ret; field string TieRegDPP = "$old"; field bit IsSMFMAC = false; field bit HasAbid = !and(IsMAI, HasSrc1); @@ -2705,8 +2799,8 @@ class VOPProfile_True16 : VOPProfile { let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; let DstRC64 = getVALUDstForVT.ret; let Src0RC64 = getVOP3SrcForVT.ret; @@ -2735,8 +2829,8 @@ class VOPProfile_Fake16 : VOPProfile { let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; @@ -3224,7 +3318,7 @@ def FP4FP8DstByteSelTable : GenericTable { def VOPDComponentTable : GenericTable { let FilterClass = "VOPD_Component"; let CppTypeName = "VOPDComponentInfo"; - let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"]; + let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX", "CanBeVOPD3X"]; let PrimaryKey = ["BaseVOP"]; let PrimaryKeyName = "getVOPDComponentHelper"; } @@ -3237,14 +3331,14 @@ def getVOPDBaseFromComponent : SearchIndex { def VOPDPairs : GenericTable { let FilterClass = "VOPD_Base"; let CppTypeName = "VOPDInfo"; - let Fields = ["Opcode", "OpX", "OpY", "SubTgt"]; + let Fields = ["Opcode", "OpX", "OpY", "SubTgt", "VOPD3"]; let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getVOPDOpcodeHelper"; } def getVOPDInfoFromComponentOpcodes : SearchIndex { let Table = VOPDPairs; - let Key = ["OpX", "OpY", "SubTgt"]; + let Key = ["OpX", "OpY", "SubTgt", "VOPD3"]; } include "SIInstructions.td" diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index a6c7b164c8b2c..d24c301fc1e51 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1267,6 +1267,8 @@ def VCSrc_bf16 : SrcRegOrImm9 ; def VCSrc_f16 : SrcRegOrImm9 ; def VCSrc_b32 : SrcRegOrImm9 ; def VCSrc_f32 : SrcRegOrImm9 ; +def VCSrc_b64 : SrcRegOrImm9 ; +def VCSrc_f64 : SrcRegOrImm9 ; def VCSrc_v2b16 : SrcRegOrImm9 ; def VCSrc_v2bf16: SrcRegOrImm9 ; def VCSrc_v2f16 : SrcRegOrImm9 ; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index c7c4276e0e252..2472b76fcf02c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1764,6 +1764,27 @@ let OtherPredicates = [HasExportInsts] in [(int_amdgcn_s_wait_kmcnt timm:$simm16)]>; } // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 +let SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1 in { + def S_WAIT_ASYNCCNT : + SOPP_Pseudo<"s_wait_asynccnt", (ins s16imm:$simm16), "$simm16", + [(int_amdgcn_s_wait_asynccnt timm:$simm16)]> { + let mayLoad = 1; + let mayStore = 1; + let maybeAtomic = 0; + let Uses = [ASYNCcnt]; + let Defs = [ASYNCcnt]; + } + def S_WAIT_TENSORCNT : + SOPP_Pseudo<"s_wait_tensorcnt", (ins s16imm:$simm16), "$simm16", + [(int_amdgcn_s_wait_tensorcnt timm:$simm16)]> { + let mayLoad = 1; + let mayStore = 1; + let maybeAtomic = 0; + let Uses = [TENSORcnt]; + let Defs = [TENSORcnt]; + } +} // End SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1 + let SubtargetPredicate = HasWaitXcnt, hasSideEffects = 1 in { def S_WAIT_XCNT : SOPP_Pseudo<"s_wait_xcnt", (ins s16imm:$simm16), "$simm16">; @@ -2609,6 +2630,8 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>; //===----------------------------------------------------------------------===// defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>; defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>; +defm S_WAIT_ASYNCCNT : SOPP_Real_32_gfx12<0x04a>; +defm S_WAIT_TENSORCNT : SOPP_Real_32_gfx12<0x04b>; //===----------------------------------------------------------------------===// // SOPP - GFX11, GFX12. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 10d80756943f5..a32078cc403e7 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -392,6 +392,7 @@ struct VOPDComponentInfo { uint16_t BaseVOP; uint16_t VOPDOp; bool CanBeVOPDX; + bool CanBeVOPD3X; }; struct VOPDInfo { @@ -399,6 +400,7 @@ struct VOPDInfo { uint16_t OpX; uint16_t OpY; uint16_t Subtarget; + bool VOPD3; }; struct VOPTrue16Info { @@ -591,6 +593,8 @@ const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, } unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { + if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts)) + return SIEncodingFamily::GFX1250; if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) return SIEncodingFamily::GFX12; if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) @@ -598,14 +602,27 @@ unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { llvm_unreachable("Subtarget generation does not support VOPD!"); } -CanBeVOPD getCanBeVOPD(unsigned Opc) { +CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) { + bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0; + Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc; const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); - if (Info) - return {Info->CanBeVOPDX, true}; + if (Info) { + // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a + // VOPDX is just a placeholder here, it is supported on all encodings. + // TODO: This can be optimized by creating tables of supported VOPDY + // opcodes per encoding. + unsigned VOPDMov = AMDGPU::getVOPDOpcode(AMDGPU::V_MOV_B32_e32, VOPD3); + bool CanBeVOPDY = getVOPDFull(VOPDMov, AMDGPU::getVOPDOpcode(Opc, VOPD3), + EncodingFamily, VOPD3) != -1; + return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY}; + } + return {false, false}; } -unsigned getVOPDOpcode(unsigned Opc) { +unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) { + bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0; + Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc; const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); return Info ? Info->VOPDOp : ~0u; } @@ -742,9 +759,27 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) { return getMCOpcodeGen(Opcode, static_cast(Gen)); } -int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { +unsigned getBitOp2(unsigned Opc) { + switch (Opc) { + default: + return 0; + case AMDGPU::V_AND_B32_e32: + return 0x40; + case AMDGPU::V_OR_B32_e32: + return 0x54; + case AMDGPU::V_XOR_B32_e32: + return 0x14; + case AMDGPU::V_XNOR_B32_e32: + return 0x41; + } +} + +int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, + bool VOPD3) { + bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0; + OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY; const VOPDInfo *Info = - getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily); + getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3); return Info ? Info->Opcode : -1; } @@ -759,7 +794,7 @@ std::pair getVOPDComponents(unsigned VOPDOpcode) { namespace VOPD { -ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { +ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) { assert(OpDesc.getNumDefs() == Component::DST_NUM); assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); @@ -767,10 +802,34 @@ ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); assert(TiedIdx == -1 || TiedIdx == Component::DST); HasSrc2Acc = TiedIdx != -1; + Opcode = OpDesc.getOpcode(); - SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); + IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3); + SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2) ? 3 + : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm) ? 3 + : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2 + : 1; assert(SrcOperandsNum <= Component::MAX_SRC_NUM); + if (Opcode == AMDGPU::V_CNDMASK_B32_e32 || + Opcode == AMDGPU::V_CNDMASK_B32_e64) { + // CNDMASK is an awkward exception, it has FP modifiers, but not FP + // operands. + NumVOPD3Mods = 2; + if (IsVOP3) + SrcOperandsNum = 3; + } else if (isSISrcFPOperand(OpDesc, + getNamedOperandIdx(Opcode, OpName::src0))) { + // All FP VOPD instructions have Neg modifiers for all operands except + // for tied src2. + NumVOPD3Mods = SrcOperandsNum; + if (HasSrc2Acc) + --NumVOPD3Mods; + } + + if (OpDesc.TSFlags & SIInstrFlags::VOP3) + return; + auto OperandsNum = OpDesc.getNumOperands(); unsigned CompOprIdx; for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { @@ -781,6 +840,10 @@ ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { } } +int ComponentProps::getBitOp3OperandIdx() const { + return getNamedOperandIdx(Opcode, OpName::bitop3); +} + unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { assert(CompOprIdx < Component::MAX_OPR_NUM); @@ -796,19 +859,58 @@ unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { } std::optional InstInfo::getInvalidCompOperandIndex( - std::function GetRegIdx, bool SkipSrc) const { + std::function GetRegIdx, + const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR, + bool VOPD3) const { + + auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx, + CompInfo[ComponentIndex::X].isVOP3()); + auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx, + CompInfo[ComponentIndex::Y].isVOP3()); + + const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y, + unsigned BanksMask) -> bool { + MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0); + MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0); + if (!BaseX) + BaseX = X; + if (!BaseY) + BaseY = Y; + if ((BaseX & BanksMask) == (BaseY & BanksMask)) + return true; + if (BaseX != X /* This is 64-bit register */ && + ((BaseX + 1) & BanksMask) == (BaseY & BanksMask)) + return true; + if (BaseY != Y && (BaseX & BanksMask) == ((BaseY + 1) & BanksMask)) + return true; - auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx); - auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx); + // If both are 64-bit bank conflict will be detected yet while checking + // the first subreg. + return false; + }; - const unsigned CompOprNum = - SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; unsigned CompOprIdx; - for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { - unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; - if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && - ((OpXRegs[CompOprIdx] & BanksMasks) == - (OpYRegs[CompOprIdx] & BanksMasks))) + for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) { + unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx] + : VOPD_VGPR_BANK_MASKS[CompOprIdx]; + if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx]) + continue; + + if (SkipSrc && CompOprIdx >= Component::DST_NUM) + continue; + + if (CompOprIdx < Component::DST_NUM) { + // Even if we do not check vdst parity, vdst operands still shall not + // overlap. + if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx])) + return CompOprIdx; + if (VOPD3) // No need to check dst parity. + continue; + } + + if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) && + (!AllowSameVGPR || CompOprIdx < Component::DST_NUM || + OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx])) return CompOprIdx; } @@ -822,9 +924,10 @@ std::optional InstInfo::getInvalidCompOperandIndex( // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index // for the specified component and MC operand. The callback must return 0 // if the operand is not a register or not a VGPR. -InstInfo::RegIndices InstInfo::getRegIndices( - unsigned CompIdx, - std::function GetRegIdx) const { +InstInfo::RegIndices +InstInfo::getRegIndices(unsigned CompIdx, + std::function GetRegIdx, + bool VOPD3) const { assert(CompIdx < COMPONENTS_NUM); const auto &Comp = CompInfo[CompIdx]; @@ -836,7 +939,8 @@ InstInfo::RegIndices InstInfo::getRegIndices( unsigned CompSrcIdx = CompOprIdx - DST_NUM; RegIndices[CompOprIdx] = Comp.hasRegSrcOperand(CompSrcIdx) - ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) + ? GetRegIdx(CompIdx, + Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3)) : 0; } return RegIndices; @@ -853,8 +957,9 @@ VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); const auto &OpXDesc = InstrInfo->get(OpX); const auto &OpYDesc = InstrInfo->get(OpY); - VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); - VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); + bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3; + VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3); + VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3); return VOPD::InstInfo(OpXInfo, OpYInfo); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 4f7d18170d586..6708e0a3f4549 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -593,6 +593,11 @@ bool getMAIIsDGEMM(unsigned Opc); LLVM_READONLY bool getMAIIsGFX940XDL(unsigned Opc); +// Get an equivalent BitOp3 for a binary logical \p Opc. +// \returns BitOp3 modifier for the logical operation or zero. +// Used in VOPD3 conversion. +unsigned getBitOp2(unsigned Opc); + struct CanBeVOPD { bool X; bool Y; @@ -603,7 +608,7 @@ LLVM_READONLY unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST); LLVM_READONLY -CanBeVOPD getCanBeVOPD(unsigned Opc); +CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3); LLVM_READNONE uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal); @@ -626,10 +631,11 @@ LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); LLVM_READONLY -unsigned getVOPDOpcode(unsigned Opc); +unsigned getVOPDOpcode(unsigned Opc, bool VOPD3); LLVM_READONLY -int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily); +int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, + bool VOPD3); LLVM_READONLY bool isVOPD(unsigned Opc); @@ -662,6 +668,7 @@ enum Component : unsigned { // LSB mask for VGPR banks per VOPD component operand. // 4 banks result in a mask 3, setting 2 lower bits. constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1}; +constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3}; enum ComponentIndex : unsigned { X = 0, Y = 1 }; constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; @@ -673,10 +680,13 @@ class ComponentProps { unsigned SrcOperandsNum = 0; unsigned MandatoryLiteralIdx = ~0u; bool HasSrc2Acc = false; + unsigned NumVOPD3Mods = 0; + unsigned Opcode = 0; + bool IsVOP3 = false; public: ComponentProps() = default; - ComponentProps(const MCInstrDesc &OpDesc); + ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false); // Return the total number of src operands this component has. unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; } @@ -706,6 +716,18 @@ class ComponentProps { // Return true iif this component has tied src2. bool hasSrc2Acc() const { return HasSrc2Acc; } + // Return a number of source modifiers if instruction is used in VOPD3. + unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; } + + // Return opcode of the component. + unsigned getOpcode() const { return Opcode; } + + // Returns if component opcode is in VOP3 encoding. + unsigned isVOP3() const { return IsVOP3; } + + // Return index of BitOp3 operand or -1. + int getBitOp3OperandIdx() const; + private: bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const { assert(CompSrcIdx < Component::MAX_SRC_NUM); @@ -758,7 +780,15 @@ class ComponentLayout { // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] // Each ComponentKind has operand indices defined below. static constexpr unsigned MC_DST_IDX[] = {0, 0, 1}; - static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */}; + + // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not + // used if there is tied accumulator. Indexing of this array: + // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE + // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second + // component add OpX.MCSrcNum + OpX.VOPD3ModsNum. + // For VOPD1/VOPD2 use column with zero modifiers. + static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = { + {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}}; // Parsed operands of regular instructions are ordered as follows: // Mnemo dst src0 [vsrc1 ...] @@ -774,25 +804,40 @@ class ComponentLayout { private: const ComponentKind Kind; const ComponentProps PrevComp; + const unsigned VOPD3ModsNum; + const int BitOp3Idx; // Index of bitop3 operand or -1 public: // Create layout for COMPONENT_X or SINGLE component. - ComponentLayout(ComponentKind Kind) : Kind(Kind) { + ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx) + : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) { assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X); } // Create layout for COMPONENT_Y which depends on COMPONENT_X layout. - ComponentLayout(const ComponentProps &OpXProps) - : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {} + ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum, + int BitOp3Idx) + : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps), + VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {} public: // Return the index of dst operand in MCInst operands. unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; } // Return the index of the specified src operand in MCInst operands. - unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const { + unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const { assert(CompSrcIdx < Component::MAX_SRC_NUM); - return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx; + + if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1) + return BitOp3Idx; + + if (VOPD3) { + return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() + + getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0); + } + + return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() + + (Kind != SINGLE ? 1 : 0); } // Return the index of dst operand in the parsed operands array. @@ -813,19 +858,27 @@ class ComponentLayout { unsigned getPrevCompParsedSrcNum() const { return PrevComp.getCompParsedSrcOperandsNum(); } + unsigned getPrevCompVOPD3ModsNum() const { + return PrevComp.getCompVOPD3ModsNum(); + } }; // Layout and properties of VOPD components. -class ComponentInfo : public ComponentLayout, public ComponentProps { +class ComponentInfo : public ComponentProps, public ComponentLayout { public: // Create ComponentInfo for COMPONENT_X or SINGLE component. ComponentInfo(const MCInstrDesc &OpDesc, - ComponentKind Kind = ComponentKind::SINGLE) - : ComponentLayout(Kind), ComponentProps(OpDesc) {} + ComponentKind Kind = ComponentKind::SINGLE, + bool VOP3Layout = false) + : ComponentProps(OpDesc, VOP3Layout), + ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {} // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout. - ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps) - : ComponentLayout(OpXProps), ComponentProps(OpDesc) {} + ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps, + bool VOP3Layout = false) + : ComponentProps(OpDesc, VOP3Layout), + ComponentLayout(OpXProps, getCompVOPD3ModsNum(), + getBitOp3OperandIdx()) {} // Map component operand index to parsed operand index. // Return 0 if the specified operand does not exist. @@ -857,23 +910,36 @@ class InstInfo { // if the operand is not a register or not a VGPR. // If \p SkipSrc is set to true then constraints for source operands are not // checked. + // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources + // even though it violates requirement to be from different banks. + // If \p VOPD3 is set to true both dst registers allowed to be either odd + // or even and instruction may have real src2 as opposed to tied accumulator. bool hasInvalidOperand(std::function GetRegIdx, - bool SkipSrc = false) const { - return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value(); + const MCRegisterInfo &MRI, bool SkipSrc = false, + bool AllowSameVGPR = false, bool VOPD3 = false) const { + return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR, + VOPD3) + .has_value(); } // Check VOPD operands constraints. // Return the index of an invalid component operand, if any. // If \p SkipSrc is set to true then constraints for source operands are not - // checked. + // checked except for being from the same halves of VGPR file on gfx1250. + // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources + // even though it violates requirement to be from different banks. + // If \p VOPD3 is set to true both dst registers allowed to be either odd + // or even and instruction may have real src2 as opposed to tied accumulator. std::optional getInvalidCompOperandIndex( std::function GetRegIdx, - bool SkipSrc = false) const; + const MCRegisterInfo &MRI, bool SkipSrc = false, + bool AllowSameVGPR = false, bool VOPD3 = false) const; private: RegIndices getRegIndices(unsigned ComponentIdx, - std::function GetRegIdx) const; + std::function GetRegIdx, + bool VOPD3) const; }; } // namespace VOPD @@ -1537,6 +1603,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + case AMDGPU::OPERAND_KIMM64: return 8; case AMDGPU::OPERAND_REG_IMM_INT16: diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 2c0871347ebb9..211112e5262a3 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -892,6 +892,8 @@ class VOP1_DPP16_Gen op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = VOP1_DPP16 { let AssemblerPredicate = Gen.AssemblerPredicate; let DecoderNamespace = Gen.DecoderNamespace; + let OtherPredicates = !listconcat(ps.OtherPredicates, + !if(p.HasExt64BitDPP, [HasDPALU_DPP], [])); } class VOP1_DPP8 op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : @@ -967,7 +969,8 @@ multiclass VOP1_Real_dpp_with_name op, string opName, multiclass VOP1_Real_dpp8 op, string opName = NAME> { defvar ps = !cast(opName#"_e32"); - def _dpp8#Gen.Suffix : VOP1_DPP8_Gen; + if !not(ps.Pfl.HasExt64BitDPP) then + def _dpp8#Gen.Suffix : VOP1_DPP8_Gen; } multiclass VOP1_Real_dpp8_with_name op, string opName, @@ -976,7 +979,8 @@ multiclass VOP1_Real_dpp8_with_name op, string opName, let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = Gen.DecoderNamespace # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { - defm NAME : VOP1_Real_dpp8; + if !not(ps.Pfl.HasExt64BitDPP) then + defm NAME : VOP1_Real_dpp8; } } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 859d5bae3d460..25c6cbc3e1ab5 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -36,6 +36,20 @@ class VOP2_MADKe op, VOPProfile P> : Enc64 { let Inst{63-32} = imm; } +class VOP2_MADK64e op, VOPProfile P> : Enc96 { + bits<8> vdst; + bits<9> src0; + bits<8> src1; + bits<64> imm; + + let Inst{8-0} = !if(P.HasSrc0, src0, 0); + let Inst{16-9} = !if(P.HasSrc1, src1, 0); + let Inst{24-17} = !if(P.EmitDst, vdst, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding + let Inst{95-32} = imm; +} + class VOP2_SDWAe op, VOPProfile P> : VOP_SDWAe

{ bits<8> vdst; bits<8> src1; @@ -148,7 +162,7 @@ multiclass VOP2Inst_e32; } multiclass - VOP2Inst_e32_VOPD VOPDOp, + VOP2Inst_e32_VOPD VOPDOp, string VOPDName, SDPatternOperator node = null_frag, string revOp = opName> { defm NAME : VOP2Inst_e32, @@ -167,6 +181,15 @@ multiclass VOP2Inst_e64 VOPDOp, + string VOPDName, + SDPatternOperator node = null_frag, + string revOp = opName> { + defm NAME: VOP2Inst_e64, + VOPD_Component; +} + multiclass VOP2Inst_sdwa { @@ -227,12 +250,12 @@ multiclass VOP2Inst_e64_t16 VOPDOp, + bits<6> VOPDOp, string VOPDName, SDPatternOperator node = null_frag, string revOp = opName> : VOP2Inst_e32_VOPD, - VOP2Inst_e64, + VOP2Inst_e64_VOPD, VOP2Inst_sdwa { if P.HasExtDPP then def _dpp : VOP2_DPP_Pseudo ; @@ -288,7 +311,7 @@ multiclass VOP2bInstAliases { } multiclass - VOP2eInst_Base VOPDOp, string VOPDName, + VOP2eInst_Base VOPDOp, string VOPDName, SDPatternOperator node, string revOp, bit useSGPRInput> { let SchedRW = [Write32Bit] in { @@ -310,9 +333,14 @@ multiclass def _dpp : VOP2_DPP_Pseudo ; } - def _e64 : VOP3InstBase , - Commutable_REV { - let isReMaterializable = 1; + let isReMaterializable = 1 in { + if !empty(VOPDName) then + def _e64 : VOP3InstBase , + Commutable_REV; + else + def _e64 : VOP3InstBase , + Commutable_REV, + VOPD_Component; } let SubtargetPredicate = isGFX11Plus in { @@ -328,7 +356,7 @@ multiclass : VOP2eInst_Base; multiclass - VOP2eInst_VOPD VOPDOp, string VOPDName, + VOP2eInst_VOPD VOPDOp, string VOPDName, SDPatternOperator node = null_frag, string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> : VOP2eInst_Base; @@ -361,10 +389,14 @@ class VOP_MADK_Base : VOPProfile <[vt, vt, vt, vt]> { } class VOP_MADAK : VOP_MADK_Base { - field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); + field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, + !if(!eq(vt.Size, 64), KImmFP64, + KImmFP16)); field dag Ins32 = !if(!eq(vt.Size, 32), (ins VSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm), - (ins VSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)); + !if(!eq(vt.Size, 64), + (ins VSrc_f64:$src0, VReg_64:$src1, ImmOpType:$imm), + (ins VSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm))); field dag InsVOPDX = (ins VSrc_f32:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); let InsVOPDX_immX = (ins VSrc_f32:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immX); field dag InsVOPDY = (ins VSrc_f32:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); @@ -390,12 +422,17 @@ def VOP_MADAK_F16_fake16 : VOP_MADAK { let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm); } def VOP_MADAK_F32 : VOP_MADAK ; +def VOP_MADAK_F64 : VOP_MADAK ; class VOP_MADMK : VOP_MADK_Base { - field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); + field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, + !if(!eq(vt.Size, 64), KImmFP64, + KImmFP16)); field dag Ins32 = !if(!eq(vt.Size, 32), (ins VSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1), - (ins VSrc_f16:$src0, ImmOpType:$imm, VGPR_32:$src1)); + !if(!eq(vt.Size, 64), + (ins VSrc_f64:$src0, ImmOpType:$imm, VReg_64:$src1), + (ins VSrc_f16:$src0, ImmOpType:$imm, VGPR_32:$src1))); field dag InsVOPDX = (ins VSrc_f32:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); let InsVOPDX_immX = (ins VSrc_f32:$src0X, ImmOpType:$immX, VGPR_32:$vsrc1X); field dag InsVOPDY = (ins VSrc_f32:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); @@ -421,6 +458,7 @@ def VOP_MADMK_F16_fake16 : VOP_MADMK { let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1); } def VOP_MADMK_F32 : VOP_MADMK ; +def VOP_MADMK_F64 : VOP_MADMK ; // Returns the vreg register class to use for sources of VOP3 instructions for the // given VT. @@ -458,6 +496,12 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); + let InsVOPD3X = (ins Src0ModVOPD3:$src0X_modifiers, Src0VOPD3:$src0X, + Src1ModVOPD3:$vsrc1X_modifiers, Src1RC32:$vsrc1X, + VGPRSrc_32:$src2X); + let InsVOPD3Y = (ins Src0ModVOPD3:$src0Y_modifiers, Src0VOPD3:$src0Y, + Src1ModVOPD3:$vsrc1Y_modifiers, Src1RC32:$vsrc1Y, + VGPRSrc_32:$src2Y); let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, @@ -482,6 +526,7 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v 0 /*Src2HasMods*/, DstVT>.ret; let HasSrc2 = 0; let HasSrc2Mods = 0; + let HasVOPD3Src2 = 0; let HasExt = 1; let HasExtDPP = 1; @@ -522,8 +567,8 @@ def VOP_MAC_F16_t16 : VOP_MAC { let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; let Src0Mod = getSrc0Mod.ret; let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; @@ -554,8 +599,8 @@ def VOP_MAC_F16_fake16 : VOP_MAC { let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; let Src0Mod = getSrc0Mod.ret; let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; @@ -724,7 +769,14 @@ class VOP2e_SGPR ArgVT> : VOPProfile { let HasExtSDWA9 = 1; } -def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; +def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]> { + let Src2VOPD3 = SSrc_i1; + let InsVOPD3X = (ins FP32VCSrcInputMods:$src0X_modifiers, Src0VOPD3:$src0X, FP32VRegSrcInputMods:$vsrc1X_modifiers, Src1VOPD3:$vsrc1X, Src2VOPD3:$vsrc2X); + let InsVOPD3Y = (ins FP32VCSrcInputMods:$src0Y_modifiers, Src0VOPD3:$src0Y, FP32VRegSrcInputMods:$vsrc1Y_modifiers, Src1VOPD3:$vsrc1Y, Src2VOPD3:$vsrc2Y); + let AsmVOPD3X = "$vdstX, $src0X_modifiers, $vsrc1X_modifiers, $vsrc2X"; + let AsmVOPD3Y = "$vdstY, $src0Y_modifiers, $vsrc1Y_modifiers, $vsrc2Y"; + let HasVOPD3Src2 = 0; +} def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; // V_CNDMASK_B16 is VOP3 only def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> { @@ -745,7 +797,7 @@ def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> { let Src0VOP3DPP = VGPRSrc_16; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; } def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { let IsTrue16 = 1; @@ -757,7 +809,7 @@ def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { let Src0VOP3DPP = VGPRSrc_32; let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; } def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { @@ -819,12 +871,12 @@ defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; -defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN, smin>; -defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN, smax>; +defm V_MIN_I32 : VOP2Inst_VOPD <"v_min_i32", VOP_PAT_GEN, 0x18, "v_min_i32", smin>; +defm V_MAX_I32 : VOP2Inst_VOPD <"v_max_i32", VOP_PAT_GEN, 0x17, "v_max_i32", smax>; defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN, umin>; defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN, umax>; -defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; -defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; +defm V_LSHRREV_B32 : VOP2Inst_VOPD <"v_lshrrev_b32", VOP_I32_I32_I32, 0x15, "v_lshrrev_b32", clshr_rev_32, "v_lshr_b32">; +defm V_ASHRREV_I32 : VOP2Inst_VOPD <"v_ashrrev_i32", VOP_I32_I32_I32, 0x16, "v_ashrrev_i32", cashr_rev_32, "v_ashr_i32">; defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN, 0x12, "v_and_b32", and>; defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; @@ -856,7 +908,7 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { - defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; + defm V_SUB_U32 : VOP2Inst_VOPD <"v_sub_u32", VOP_I32_I32_I32_ARITH, 0x14, "v_sub_nc_u32", null_frag, "v_sub_u32">; defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; } @@ -1261,12 +1313,20 @@ let AddedComplexity = 30 in { } } // End AddedComplexity = 30 -let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in { +let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, CanBeVOPD3X = 0, FixedSize = 1 in { def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; let isCommutable = 1 in def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; -} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 +} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, CanBeVOPD3X = 0, FixedSize = 1 + +let SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1, + FixedSize = 1, Size = 12, SchedRW = [Write64Bit] in { +def V_FMAMK_F64 : VOP2_Pseudo<"v_fmamk_f64", VOP_MADMK_F64, [], "">; + +let isCommutable = 1 in +def V_FMAAK_F64 : VOP2_Pseudo<"v_fmaak_f64", VOP_MADAK_F64, [], "">; +} // End SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1, FixedSize = 1, Size = 12, SchedRW = [Write64Bit] let SubtargetPredicate = HasPkFmacF16Inst in { defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; @@ -1390,12 +1450,10 @@ def : VOPBinOpClampPat; } let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in { - let SchedRW = [WriteDoubleAdd], isCommutable = 1 in { - let FPDPRounding = 1 in { - defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>; - defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>; - } // End FPDPRounding = 1 - } // End SchedRW = [WriteDoubleAdd], isCommutable = 1 + let SchedRW = [WriteDoubleAdd], isCommutable = 1, FPDPRounding = 1 in { + defm V_ADD_F64_pseudo : VOP2Inst_VOPD <"v_add_f64_pseudo", VOP_F64_F64_F64, 0x21, "v_add_f64", any_fadd>; + defm V_MUL_F64_pseudo : VOP2Inst_VOPD <"v_mul_f64_pseudo", VOP_F64_F64_F64, 0x22, "v_mul_f64", fmul>; + } // End SchedRW = [WriteDoubleAdd], isCommutable = 1, FPDPRounding = 1 let SchedRW = [Write64Bit] in { defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>; } // End SchedRW = [Write64Bit] @@ -1403,8 +1461,8 @@ let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in { let SubtargetPredicate = HasIEEEMinimumMaximumInsts, isReMaterializable = 1, SchedRW = [WriteDoubleAdd], isCommutable = 1 in { - defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>; - defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>; + defm V_MIN_NUM_F64 : VOP2Inst_VOPD <"v_min_num_f64", VOP_F64_F64_F64, 0x24, "v_min_num_f64", fminnum_like>; + defm V_MAX_NUM_F64 : VOP2Inst_VOPD <"v_max_num_f64", VOP_F64_F64_F64, 0x23, "v_max_num_f64", fmaxnum_like>; } //===----------------------------------------------------------------------===// @@ -1492,6 +1550,14 @@ multiclass VOP2Only_Real_MADK op> { VOP2_MADKe(NAME).Pfl>; } +multiclass VOP2Only_Real_MADK64 op> { + def Gen.Suffix : + VOP2_Real_Gen(NAME), Gen>, + VOP2_MADK64e(NAME).Pfl> { + let DecoderNamespace = Gen.DecoderNamespace; + } +} + multiclass VOP2Only_Real_MADK_with_name op, string asmName, string opName = NAME> { def Gen.Suffix : @@ -1766,6 +1832,9 @@ let SubtargetPredicate = isGFX12Plus in { V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">; } // End SubtargetPredicate = isGFX12Plus +defm V_FMAMK_F64 : VOP2Only_Real_MADK64; +defm V_FMAAK_F64 : VOP2Only_Real_MADK64; + //===----------------------------------------------------------------------===// // GFX11. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index e7ebc109b5dd5..75c531913ded1 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -51,6 +51,8 @@ class V_MUL_PROF : VOP3_Profile

{ def V_LSHL_ADD_U64_PROF : VOP3_Profile; +def VOP_F64_F64_F64_F64_DPP_PROF : VOP3_Profile; + def DIV_FIXUP_F32_PROF : VOP3_Profile { let HasExtVOP3DPP = 0; let HasExtDPP = 0; @@ -147,12 +149,12 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile>; defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile>; -defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, any_fma>; +defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, any_fma>, VOPD_Component<0x13, "v_fma_f32">; defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; let SchedRW = [WriteDoubleAdd] in { let FPDPRounding = 1 in { -defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile, any_fma>; +defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP_F64_F64_F64_F64_DPP_PROF, any_fma>, VOPD_Component<0x20, "v_fma_f64">; let SubtargetPredicate = isNotGFX12Plus in { defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile, any_fadd>; defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile, any_fmul>; @@ -1033,7 +1035,11 @@ class VOP3_BITOP3_Profile : VOP3_Profile let HasClamp = 0; let HasOMod = 0; let HasModifiers = 0; + let HasVOPD3Src2 = 0; let HasBitOp3 = 1; + + let InsVOPD3Y = (ins Src0VOPD3:$src0Y, Src1VOPD3:$vsrc1Y, bitop3_0:$bitop3); + let AsmVOPD3Y = getAsmVOPDPart.ret # "$bitop3"; } class VOP3_CVT_SCALE_F1632_FP8BF8_Profile : VOP3_Profile, @@ -1416,7 +1422,8 @@ let SubtargetPredicate = HasBitOp3Insts in { defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16", VOP3_BITOP3_Profile, VOP3_OPSEL>>; defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32", - VOP3_BITOP3_Profile, VOP3_REGULAR>>; + VOP3_BITOP3_Profile, VOP3_REGULAR>>, + VOPD_Component<0x12, "v_bitop2_b32">; } def : GCNPat< (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)), diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 9e84f6aed0176..2c1193509da9b 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -112,8 +112,8 @@ multiclass VOPC_Profile_t16 sched, ValueType vt0, ValueType let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } def _fake16: VOPC_Profile { let IsTrue16 = 1; @@ -138,8 +138,8 @@ multiclass VOPC_Profile_t16 sched, ValueType vt0, ValueType let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } } @@ -184,8 +184,8 @@ multiclass VOPC_NoSdst_Profile_t16 sched, ValueType vt0, Va let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } def _fake16 : VOPC_NoSdst_Profile { let IsTrue16 = 1; @@ -208,8 +208,8 @@ multiclass VOPC_NoSdst_Profile_t16 sched, ValueType vt0, Va let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } } @@ -929,8 +929,8 @@ multiclass VOPC_Class_Profile_t16 sched> { let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } def _fake16 : VOPC_Class_Profile_Base { let IsTrue16 = 1; @@ -955,8 +955,8 @@ multiclass VOPC_Class_Profile_t16 sched> { let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } } @@ -998,8 +998,8 @@ multiclass VOPC_Class_NoSdst_Profile_t16 sched> { let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } def _fake16 : VOPC_Class_NoSdst_Profile { let IsTrue16 = 1; @@ -1022,8 +1022,8 @@ multiclass VOPC_Class_NoSdst_Profile_t16 sched> { let Src1Mod = getSrcMod.ret; let Src2Mod = getSrcMod.ret; let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; - let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; - let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3VC.ret; + let Src2ModVOP3DPP = getSrcModVOP3VC.ret; } } diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td index 4054002c41478..3e7af12f6b600 100644 --- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td @@ -50,6 +50,47 @@ class VOPD_MADKe opX, bits<5> opY> : Enc96 { let Inst{95-64} = imm; } +class VOPD3e opX, bits<6> opY, VOP_Pseudo VDX, VOP_Pseudo VDY> : Enc96 { + bits<9> src0X; + bits<8> vsrc1X; + bits<8> vsrc2X; + bits<8> vdstX; + bits<9> src0Y; + bits<8> vsrc1Y; + bits<8> vsrc2Y; + bits<8> vdstY; + // neg modifiers + bit src0X_modifiers; + bit src0Y_modifiers; + bit vsrc1X_modifiers; + bit vsrc1Y_modifiers; + bit vsrc2X_modifiers; + bit vsrc2Y_modifiers; + bits<8> bitop3; + + let Inst{8-0} = src0X; + let Inst{17-12} = opY; + let Inst{23-18} = opX; + let Inst{31-24} = 0xcf; // encoding + let Inst{40-32} = src0Y; + let Inst{41} = !if(VDX.Pfl.HasModifiers, src0X_modifiers, 0); + let Inst{42} = !if(!and(VDX.Pfl.HasSrc1, VDX.Pfl.HasModifiers), vsrc1X_modifiers, 0); + let Inst{43} = !if(!and(VDX.Pfl.HasVOPD3Src2, VDX.Pfl.HasModifiers), vsrc2X_modifiers, 0); + let Inst{44} = !if(VDY.Pfl.HasModifiers, src0Y_modifiers, 0); + let Inst{45} = !if(!and(VDY.Pfl.HasSrc1, VDY.Pfl.HasModifiers), vsrc1Y_modifiers, 0); + let Inst{46} = !if(!and(VDY.Pfl.HasVOPD3Src2, VDY.Pfl.HasModifiers), vsrc2Y_modifiers, 0); + let Inst{55-48} = !if(!eq(!find(VDX.Pfl.AsmVOPD3X, "$vsrc1X"), -1), 0, vsrc1X); + + // Despite the vsrc operand name, SGPRs can be used for vsrc2X for + // V_DUAL_CNDMASK_B32 + let Inst{63-56} = !if(!eq(!find(VDX.Pfl.AsmVOPD3X, "$vsrc2X"), -1), 0, vsrc2X); + let Inst{71-64} = vdstX; + let Inst{79-72} = !if(!eq(!find(VDY.Pfl.AsmVOPD3Y, "$vsrc1Y"), -1), 0, vsrc1Y); + let Inst{87-80} = !if(!ne(!find(VDY.Pfl.AsmVOPD3Y, "bitop"), -1), bitop3, + !if(!eq(!find(VDY.Pfl.AsmVOPD3Y, "$vsrc2Y"), -1), 0, vsrc2Y)); + let Inst{95-88} = vdstY; +} + //===----------------------------------------------------------------------===// // VOPD classes //===----------------------------------------------------------------------===// @@ -71,8 +112,8 @@ class VOPD_Base { // Fields for table indexing Instruction Opcode = !cast(NAME); - bits<5> OpX = XasVC.VOPDOp; - bits<5> OpY = YasVC.VOPDOp; + bits<6> OpX = XasVC.VOPDOp; + bits<6> OpY = YasVC.VOPDOp; bits<4> SubTgt = Gen.Subtarget; let VALU = 1; @@ -110,7 +151,7 @@ class VOPD_Base : VOPD_Base, - VOPDe { + VOPDe { let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X); let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y); } @@ -118,29 +159,48 @@ class VOPD : VOPD_Base, - VOPD_MADKe { + VOPD_MADKe { let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X); let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y); let Size = 12; let FixedSize = 1; } +class VOPD3 + : VOPD_Base, + VOPD3e { + let VOPD3 = 1; + let Size = 12; + // VOPD3 uses promoted form of VOP2 instructions, so V_CNDMASK_B32 is not + // limited to VCC src2 only, and a real SGPR will be used as an operand + // instead. + defvar UsesX = !if(!eq(VDX, V_CNDMASK_B32_e32), !filter(x, VDX.Uses, !ne(x, VCC)), VDX.Uses); + defvar UsesY = !if(!eq(VDY, V_CNDMASK_B32_e32), !filter(x, VDY.Uses, !ne(x, VCC)), VDY.Uses); + let Uses = RegListUnion.ret; +} + defvar VOPDPseudosCommon = [ "V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32", "V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32", "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32", "V_DOT2C_F32_F16_e32", "V_DOT2C_F32_BF16_e32" ]; -defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32", - "V_AND_B32_e32"]; +defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32"]; +defvar VOPDYOnlyPseudosGFX11_12 = ["V_AND_B32_e32"]; +defvar VOPDYOnlyPseudosGFX1250 = ["V_MAX_I32_e32", "V_MIN_I32_e32", + "V_SUB_U32_e32", "V_LSHRREV_B32_e32", + "V_ASHRREV_I32_e32"]; defvar VOPDXPseudosGFX11 = VOPDPseudosCommon; defvar VOPDXPseudosGFX12 = VOPDPseudosCommon; -defvar VOPDYPseudosGFX11 = !listconcat(VOPDXPseudosGFX11, VOPDYOnlyPseudosCommon); -defvar VOPDYPseudosGFX12 = !listconcat(VOPDXPseudosGFX12, VOPDYOnlyPseudosCommon); +defvar VOPDYPseudosGFX11 = !listconcat(VOPDXPseudosGFX11, VOPDYOnlyPseudosCommon, VOPDYOnlyPseudosGFX11_12); +defvar VOPDYPseudosGFX12 = !listconcat(VOPDXPseudosGFX12, VOPDYOnlyPseudosCommon, VOPDYOnlyPseudosGFX11_12); +defvar VOPDYPseudosGFX1250 = !listconcat(VOPDXPseudosGFX12, VOPDYOnlyPseudosCommon, VOPDYOnlyPseudosGFX1250); def GFX11GenD : GFXGenD; -def GFX12GenD : GFXGenD; +def GFX12GenD : GFXGenD; +def GFX1250GenD : GFXGenD; def VOPDDstYOperand : RegisterOperand { @@ -148,16 +208,13 @@ def VOPDDstYOperand : RegisterOperand { } class getRenamed { - string ret = !if(!eq(Gen.Subtarget, GFX12Gen.Subtarget), - !if(!eq(VOPDName, "v_dual_max_f32"), - "v_dual_max_num_f32", - !if(!eq(VOPDName, "v_dual_min_f32"), - "v_dual_min_num_f32", - VOPDName)), - VOPDName); + string ret = !cond(!eq(Gen.Subtarget, GFX11Gen.Subtarget) : VOPDName, + !eq(VOPDName, "v_dual_max_f32") : "v_dual_max_num_f32", + !eq(VOPDName, "v_dual_min_f32") : "v_dual_min_num_f32", + true : VOPDName); } -foreach Gen = [GFX11GenD, GFX12GenD] in { +foreach Gen = [GFX11GenD, GFX12GenD, GFX1250GenD] in { foreach x = Gen.VOPDXPseudos in { foreach y = Gen.VOPDYPseudos in { defvar xInst = !cast(x); @@ -192,3 +249,41 @@ foreach Gen = [GFX11GenD, GFX12GenD] in { } } +defvar VOPD3XPseudosExtra = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_FMA_F32_e64", "V_SUB_U32_e32", + "V_LSHRREV_B32_e32", "V_ASHRREV_I32_e32", "V_FMA_F64_e64", "V_ADD_F64_pseudo_e32", + "V_MUL_F64_pseudo_e32", "V_MAX_NUM_F64_e32", "V_MIN_NUM_F64_e32"]; +defvar VOPD3XPseudosGFX1250 = !listconcat( + !filter(x, VOPDXPseudosGFX12, !and(!eq(!find(x, "FMAAK"), -1), + !eq(!find(x, "FMAMK"), -1))), + VOPD3XPseudosExtra); +defvar VOPD3YPseudosExtra = ["V_BITOP3_B32_e64", "V_FMA_F32_e64"]; +defvar VOPD3YPseudosGFX1250 = !listconcat( + !filter(x, VOPDYPseudosGFX1250, !and(!eq(!find(x, "FMAAK"), -1), + !eq(!find(x, "FMAMK"), -1))), + VOPD3YPseudosExtra); + +def GFX1250GenD3 : GFXGenD; + +class getOpcMap { + defvar BaseName = !substr(OPName,2); + string ret = !cond(!eq(BaseName, "BITOP3_B32_e64") : "BITOP2_B32_e64", + 1 : BaseName); +} + +foreach Gen = [GFX1250GenD3] in { + foreach x = Gen.VOPDXPseudos in { + foreach y = Gen.VOPDYPseudos in { + defvar xInst = !cast(x); + defvar yInst = !cast(y); + defvar XasVC = !cast(x); + defvar YasVC = !cast(y); + defvar xAsmName = getRenamed.ret; + defvar yAsmName = getRenamed.ret; + defvar OpName = "V_DUAL_" # getOpcMap.ret # "_X_" # getOpcMap.ret # "_e96" # Gen.Suffix; + defvar asm = xAsmName # xInst.Pfl.AsmVOPD3X #" :: "# yAsmName #" "# yInst.Pfl.AsmVOPD3Y; + defvar ins = !con(xInst.Pfl.InsVOPD3X, yInst.Pfl.InsVOPD3Y); + defvar outs = (outs xInst.Pfl.DstRC:$vdstX, yInst.Pfl.DstRC:$vdstY); + def OpName : VOPD3; + } + } +} diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 3e01f8cd044e2..df215d23f7f40 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -35,12 +35,17 @@ class VOP { // First 13 insts from VOPDY are also VOPDX. DOT2ACC_F32_BF16 is omitted defvar VOPDX_Max_Index = 12; +defvar VOPD3X_Max_Index = 36; -class VOPD_Component OpIn, string vOPDName> { +class VOPD_Component OpIn, string vOPDName> { Instruction BaseVOP = !cast(NAME); string VOPDName = "v_dual_" # !substr(vOPDName, 2); - bits<5> VOPDOp = OpIn; + bits<6> VOPDOp = OpIn; bit CanBeVOPDX = !le(VOPDOp, VOPDX_Max_Index); + bit CanBeVOPD3X = !and(!le(VOPDOp, VOPD3X_Max_Index), + !and(!ne(vOPDName, "v_bitop2_b32"), + !and(!ne(vOPDName, "v_max_i32"), + !ne(vOPDName, "v_min_i32")))); } class VOPAnyCommon pattern> : @@ -627,9 +632,9 @@ def SDWA { class VOP_SDWAe : Enc64 { bits<8> src0; bits<3> src0_sel; - bits<2> src0_modifiers; // float: {abs,neg}, int {sext} + bits<5> src0_modifiers; // float: {abs,neg}, int {sext} bits<3> src1_sel; - bits<2> src1_modifiers; + bits<5> src1_modifiers; bits<3> dst_sel; bits<2> dst_unused; bits<1> clamp; @@ -639,10 +644,10 @@ class VOP_SDWAe : Enc64 { let Inst{44-43} = !if(P.EmitDstSel, dst_unused{1-0}, ?); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, 0); - let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{4}, 0); let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, 0); - let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); + let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{4}, 0); let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); } @@ -663,18 +668,18 @@ class VOP_SDWAe : Enc64 { class VOP_SDWA9e : Enc64 { bits<9> src0; // {src0_sgpr{0}, src0{7-0}} bits<3> src0_sel; - bits<2> src0_modifiers; // float: {abs,neg}, int {sext} + bits<5> src0_modifiers; // float: {abs,neg}, int {sext} bits<3> src1_sel; - bits<2> src1_modifiers; + bits<5> src1_modifiers; bits<1> src1_sgpr; let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, 0); - let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{4}, 0); let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{55} = !if(P.HasSrc0, src0{8}, 0); let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, 0); - let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); + let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{4}, 0); let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); let Inst{63} = 0; // src1_sgpr - should be specified in subclass } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cbd43cde78548..fb72bab03e750 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -115,7 +115,6 @@ #include using namespace llvm; -using namespace llvm::PatternMatch; #define DEBUG_TYPE "arm-isel" @@ -5519,6 +5518,24 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(VT.getSizeInBits() - 1, dl, VT)); return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT)); } + + // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns. + // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1)) + // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1)) + // Both require less instructions than compare and conditional select. + if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TrueVal && RHSC && + RHSC->isZero() && CFVal && CFVal->isZero() && + LHS.getValueType() == RHS.getValueType()) { + EVT VT = LHS.getValueType(); + SDValue Shift = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(VT.getSizeInBits() - 1, dl, VT)); + + if (CC == ISD::SETGT) + Shift = DAG.getNOT(dl, Shift, VT); + + return DAG.getNode(ISD::AND, dl, VT, LHS, Shift); + } } if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index 9c38901f6821f..b6e8ce7d78b23 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -160,18 +160,18 @@ void DXContainerGlobals::addRootSignature(Module &M, assert(MMI.EntryPropertyVec.size() == 1); - auto &RSA = getAnalysis(); + auto &RSA = getAnalysis().getRSInfo(); const Function *EntryFunction = MMI.EntryPropertyVec[0].Entry; - const auto &FuncRs = RSA.find(EntryFunction); + const std::optional &RS = + RSA.getDescForFunction(EntryFunction); - if (FuncRs == RSA.end()) + if (!RS) return; - const RootSignatureDesc &RS = FuncRs->second; SmallString<256> Data; raw_svector_ostream OS(Data); - RS.write(OS); + RS->write(OS); Constant *Constant = ConstantDataArray::getString(M.getContext(), Data, /*AddNull*/ false); diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index c8866bfefdfc5..703a9e56626c8 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/AttributeMask.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -239,6 +240,11 @@ class DXILPrepareModule : public ModulePass { for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx) F.removeParamAttrs(Idx, AttrMask); + // Lifetime intrinsics in LLVM 3.7 do not have the memory FnAttr + if (Intrinsic::ID IID = F.getIntrinsicID(); + IID == Intrinsic::lifetime_start || IID == Intrinsic::lifetime_end) + F.removeFnAttr(Attribute::Memory); + for (auto &BB : F) { IRBuilder<> Builder(&BB); for (auto &I : make_early_inc_range(BB)) { @@ -247,7 +253,7 @@ class DXILPrepareModule : public ModulePass { // Emtting NoOp bitcast instructions allows the ValueEnumerator to be // unmodified as it reserves instruction IDs during contruction. - if (auto LI = dyn_cast(&I)) { + if (auto *LI = dyn_cast(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, LI->getPointerOperand(), LI->getType())) { @@ -257,7 +263,7 @@ class DXILPrepareModule : public ModulePass { } continue; } - if (auto SI = dyn_cast(&I)) { + if (auto *SI = dyn_cast(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, SI->getPointerOperand(), SI->getValueOperand()->getType())) { @@ -268,7 +274,7 @@ class DXILPrepareModule : public ModulePass { } continue; } - if (auto GEP = dyn_cast(&I)) { + if (auto *GEP = dyn_cast(&I)) { if (Value *NoOpBitcast = maybeGenerateBitcast( Builder, PointerTypes, I, GEP->getPointerOperand(), GEP->getSourceElementType())) @@ -280,6 +286,17 @@ class DXILPrepareModule : public ModulePass { CB->removeRetAttrs(AttrMask); for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx) CB->removeParamAttrs(Idx, AttrMask); + // LLVM 3.7 Lifetime intrinics require an i8* pointer operand, so we + // insert a bitcast here to ensure that is the case + if (isa(CB)) { + Value *PtrOperand = CB->getArgOperand(1); + Builder.SetInsertPoint(CB); + PointerType *PtrTy = cast(PtrOperand->getType()); + Value *NoOpBitcast = Builder.Insert( + CastInst::Create(Instruction::BitCast, PtrOperand, + Builder.getPtrTy(PtrTy->getAddressSpace()))); + CB->setArgOperand(1, NoOpBitcast); + } continue; } } diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp b/llvm/lib/Target/DirectX/DXILRootSignature.cpp index cfd4107b8a3de..dfc81626da01f 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp +++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp @@ -596,9 +596,9 @@ analyzeModule(Module &M) { AnalysisKey RootSignatureAnalysis::Key; -SmallDenseMap +RootSignatureAnalysis::Result RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager &AM) { - return analyzeModule(M); + return RootSignatureBindingInfo(analyzeModule(M)); } //===----------------------------------------------------------------------===// @@ -606,8 +606,7 @@ RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager &AM) { PreservedAnalyses RootSignatureAnalysisPrinter::run(Module &M, ModuleAnalysisManager &AM) { - SmallDenseMap &RSDMap = - AM.getResult(M); + RootSignatureBindingInfo &RSDMap = AM.getResult(M); OS << "Root Signature Definitions" << "\n"; @@ -678,13 +677,14 @@ PreservedAnalyses RootSignatureAnalysisPrinter::run(Module &M, //===----------------------------------------------------------------------===// bool RootSignatureAnalysisWrapper::runOnModule(Module &M) { - FuncToRsMap = analyzeModule(M); + FuncToRsMap = std::make_unique( + RootSignatureBindingInfo(analyzeModule(M))); return false; } void RootSignatureAnalysisWrapper::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addPreserved(); } char RootSignatureAnalysisWrapper::ID = 0; diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.h b/llvm/lib/Target/DirectX/DXILRootSignature.h index be5cc78bc6bdf..fc39b38258df8 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.h +++ b/llvm/lib/Target/DirectX/DXILRootSignature.h @@ -10,6 +10,8 @@ /// Root Signatures. /// //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_DIRECTX_DXILROOTSIGNATURE_H +#define LLVM_LIB_TARGET_DIRECTX_DXILROOTSIGNATURE_H #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" @@ -34,6 +36,34 @@ enum class RootSignatureElementKind { DescriptorTable = 6, StaticSamplers = 7 }; + +class RootSignatureBindingInfo { +private: + SmallDenseMap FuncToRsMap; + +public: + using iterator = + SmallDenseMap::iterator; + + RootSignatureBindingInfo() = default; + RootSignatureBindingInfo( + SmallDenseMap Map) + : FuncToRsMap(Map) {}; + + iterator find(const Function *F) { return FuncToRsMap.find(F); } + + iterator end() { return FuncToRsMap.end(); } + + std::optional + getDescForFunction(const Function *F) { + const auto FuncRs = find(F); + if (FuncRs == end()) + return std::nullopt; + + return FuncRs->second; + } +}; + class RootSignatureAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; @@ -41,10 +71,9 @@ class RootSignatureAnalysis : public AnalysisInfoMixin { public: RootSignatureAnalysis() = default; - using Result = SmallDenseMap; + using Result = RootSignatureBindingInfo; - SmallDenseMap - run(Module &M, ModuleAnalysisManager &AM); + Result run(Module &M, ModuleAnalysisManager &AM); }; /// Wrapper pass for the legacy pass manager. @@ -53,19 +82,13 @@ class RootSignatureAnalysis : public AnalysisInfoMixin { /// passes which run through the legacy pass manager. class RootSignatureAnalysisWrapper : public ModulePass { private: - SmallDenseMap FuncToRsMap; + std::unique_ptr FuncToRsMap; public: static char ID; - RootSignatureAnalysisWrapper() : ModulePass(ID) {} - using iterator = - SmallDenseMap::iterator; - - iterator find(const Function *F) { return FuncToRsMap.find(F); } - - iterator end() { return FuncToRsMap.end(); } + RootSignatureBindingInfo &getRSInfo() { return *FuncToRsMap; } bool runOnModule(Module &M) override; @@ -84,3 +107,4 @@ class RootSignatureAnalysisPrinter } // namespace dxil } // namespace llvm +#endif diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp index bd3349d2e18c5..eb4adfea5aed6 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp @@ -152,7 +152,7 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF, if (!CSF.Int64Ops) CSF.Int64Ops = I.getType()->isIntegerTy(64); - if (!CSF.Int64Ops) { + if (!CSF.Int64Ops && !isa(&I)) { for (const Value *Op : I.operands()) { if (Op->getType()->isIntegerTy(64)) { CSF.Int64Ops = true; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 1d79c3018439e..46d5d7177c198 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -2545,6 +2545,25 @@ void DXILBitcodeWriter::writeInstruction(const Instruction &I, unsigned InstID, Vals.clear(); } +// HLSL Change +namespace { +struct ValueNameCreator { + MallocAllocator Allocator; + SmallVector + ValueNames; // SmallVector N = 2 because we currently only expect this + // to hold ValueNames for Lifetime intrinsics + ~ValueNameCreator() { + for (auto *VN : ValueNames) + VN->Destroy(Allocator); + } + ValueName *create(StringRef Name, Value *V) { + ValueName *VN = ValueName::create(Name, Allocator, V); + ValueNames.push_back(VN); + return VN; + } +}; +} // anonymous namespace + // Emit names for globals/functions etc. void DXILBitcodeWriter::writeFunctionLevelValueSymbolTable( const ValueSymbolTable &VST) { @@ -2559,9 +2578,24 @@ void DXILBitcodeWriter::writeFunctionLevelValueSymbolTable( // to ensure the binary is the same no matter what values ever existed. SmallVector SortedTable; + // HLSL Change + ValueNameCreator VNC; for (auto &VI : VST) { - SortedTable.push_back(VI.second->getValueName()); + ValueName *VN = VI.second->getValueName(); + // Clang mangles lifetime intrinsic names by appending '.p0' to the end, + // making them invalid lifetime intrinsics in LLVM 3.7. We can't + // demangle in dxil-prepare because it would result in invalid IR. + // Therefore we have to do this in the bitcode writer while writing its + // name to the symbol table. + if (const Function *Fn = dyn_cast(VI.getValue()); + Fn && Fn->isIntrinsic()) { + Intrinsic::ID IID = Fn->getIntrinsicID(); + if (IID == Intrinsic::lifetime_start || IID == Intrinsic::lifetime_end) + VN = VNC.create(Intrinsic::getBaseName(IID), VI.second); + } + SortedTable.push_back(VN); } + // The keys are unique, so there shouldn't be stability issues. llvm::sort(SortedTable, [](const ValueName *A, const ValueName *B) { return A->first() < B->first(); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index acd5b58c48785..ec73e58ce5d44 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1762,6 +1762,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); } + setOperationAction(ISD::SADDSAT, MVT::i32, Legal); + setOperationAction(ISD::SADDSAT, MVT::i64, Legal); + // Extending loads from (native) vectors of i8 into (native) vectors of i16 // are legal. setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0e13dd3214da6..f1fa40c1b9036 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -117,6 +117,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + if (Subtarget.useHVX128BOps()) + setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { @@ -204,6 +206,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::CTLZ, T, Legal); setOperationAction(ISD::SELECT, T, Legal); setOperationAction(ISD::SPLAT_VECTOR, T, Legal); + setOperationAction(ISD::UADDSAT, T, Legal); + setOperationAction(ISD::SADDSAT, T, Legal); if (T != ByteV) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); @@ -295,6 +299,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::CTPOP, T, Custom); setOperationAction(ISD::ADD, T, Legal); + setOperationAction(ISD::UADDSAT, T, Legal); + setOperationAction(ISD::SADDSAT, T, Legal); setOperationAction(ISD::SUB, T, Legal); setOperationAction(ISD::MUL, T, Custom); setOperationAction(ISD::MULHS, T, Custom); @@ -2001,6 +2007,28 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines); } + + // Handle bitcast from i32, v2i16, and v4i8 to v32i1. + // Splat the input into a 32-element i32 vector, then AND each element + // with a unique bitmask to isolate individual bits. + if (ResTy == MVT::v32i1 && + (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) && + Subtarget.useHVX128BOps()) { + SDValue Val32 = Val; + if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8) + Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val); + + MVT VecTy = MVT::getVectorVT(MVT::i32, 32); + SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32); + SmallVector Mask; + for (unsigned i = 0; i < 32; ++i) + Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32)); + + SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask); + SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec); + return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded); + } + if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) { // Handle bitcast from i128 -> v128i1 and i64 -> v64i1. unsigned BitWidth = ValTy.getSizeInBits(); diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 2a991bafbf148..82d999ad820ed 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -582,6 +582,13 @@ def: Pat<(v8i1 (trunc V8I8:$Rs)), (A4_vcmpbeqi (Combinew (A2_andir (HiReg $Rs), (i32 0x01010101)), (A2_andir (LoReg $Rs), (i32 0x01010101))), (i32 1))>; +def : Pat<(v4i1 (trunc V4I8:$Rs)), + (A4_vcmpheqi (Combinew (A2_andir (HiReg (S2_vzxtbh $Rs)), 0x00010001), + (A2_andir (LoReg (S2_vzxtbh $Rs)), 0x00010001)), + (i32 1))>; +def: Pat<(v2i1 (trunc V2I16:$Rs)), + (A4_vcmpweqi (A2_andp (S2_vzxthw $Rs), (A2_combineii (i32 1), (i32 1))), + (i32 1))>; // Saturation: @@ -1517,6 +1524,14 @@ def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>; +class OpR_RR_pat_sat + : Pat<(ResType (Op RxPred:$Rs, RxPred:$Rt)), + (MI RxPred:$Rs, RxPred:$Rt)>; + +def: OpR_RR_pat_sat; +def: OpR_RR_pat_sat; + def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index ba449eaeed34c..fb2ef59d99ef1 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -426,6 +426,21 @@ let Predicates = [UseHVX] in { (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } +let Predicates = [UseHVX] in { + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; +} + // For now, we always deal with vector floating point in SF mode. class OpR_RR_pat_conv diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 72dbb44815657..c47987fbf683b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -291,6 +291,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); @@ -352,7 +353,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); @@ -499,6 +501,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); + case ISD::CONCAT_VECTORS: + return lowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); case ISD::BITREVERSE: @@ -2522,6 +2526,72 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, return SDValue(); } +SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT ResVT = Op.getSimpleValueType(); + assert(ResVT.is256BitVector() && Op.getNumOperands() == 2); + + unsigned NumOperands = Op.getNumOperands(); + unsigned NumFreezeUndef = 0; + unsigned NumZero = 0; + unsigned NumNonZero = 0; + unsigned NonZeros = 0; + SmallSet Undefs; + for (unsigned i = 0; i != NumOperands; ++i) { + SDValue SubVec = Op.getOperand(i); + if (SubVec.isUndef()) + continue; + if (ISD::isFreezeUndef(SubVec.getNode())) { + // If the freeze(undef) has multiple uses then we must fold to zero. + if (SubVec.hasOneUse()) { + ++NumFreezeUndef; + } else { + ++NumZero; + Undefs.insert(SubVec); + } + } else if (ISD::isBuildVectorAllZeros(SubVec.getNode())) + ++NumZero; + else { + assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. + NonZeros |= 1 << i; + ++NumNonZero; + } + } + + // If we have more than 2 non-zeros, build each half separately. + if (NumNonZero > 2) { + MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); + ArrayRef Ops = Op->ops(); + SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, + Ops.slice(0, NumOperands / 2)); + SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT, + Ops.slice(NumOperands / 2)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); + } + + // Otherwise, build it up through insert_subvectors. + SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT) + : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT)) + : DAG.getUNDEF(ResVT)); + + // Replace Undef operands with ZeroVector. + for (SDValue U : Undefs) + DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType())); + + MVT SubVT = Op.getOperand(0).getSimpleValueType(); + unsigned NumSubElems = SubVT.getVectorNumElements(); + for (unsigned i = 0; i != NumOperands; ++i) { + if ((NonZeros & (1 << i)) == 0) + continue; + + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i), + DAG.getVectorIdxConstant(i * NumSubElems, DL)); + } + + return Vec; +} + SDValue LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 60dc2b385a75c..6b49a98f3ae46 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -376,6 +376,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index ff7b0f2ae3f25..95e9fd49d1c0d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1860,12 +1860,6 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)), sub_128)>; -// XVPERMI_Q -foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in -def : Pat<(vt (concat_vectors LSX128:$vd, LSX128:$vj)), - (XVPERMI_Q (SUBREG_TO_REG (i64 0), LSX128:$vd, sub_128), - (SUBREG_TO_REG (i64 0), LSX128:$vj, sub_128), 2)>; - // XVABSD_{B/H/W/D}[U] defm : PatXrXr; defm : PatXrXrU; @@ -1879,6 +1873,35 @@ def : Pat<(loongarch_xvmskgez (v32i8 LASX256:$vj)), (PseudoXVMSKGEZ_B LASX256:$v def : Pat<(loongarch_xvmskeqz (v32i8 LASX256:$vj)), (PseudoXVMSKEQZ_B LASX256:$vj)>; def : Pat<(loongarch_xvmsknez (v32i8 LASX256:$vj)), (PseudoXVMSKNEZ_B LASX256:$vj)>; +// Subvector tricks +// Patterns for insert_subvector/extract_subvector +multiclass subvector_subreg_lowering { + // A 128-bit subvector extract from the first 256-bit vector position is a + // subregister copy that needs no instruction. Likewise, a 128-bit subvector + // insert to the first 256-bit vector position is a subregister copy that needs + // no instruction. + def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), + (subVT (EXTRACT_SUBREG RC:$src, subIdx))>; + def : Pat<(VT (insert_subvector undef_or_freeze_undef, subRC:$src, (iPTR 0))), + (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>; + + def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR hiIdx))), + (subVT (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), RC:$src, 1), subIdx))>; + def : Pat<(VT (insert_subvector RC:$vd, subRC:$vj, (iPTR 0))), + (VT (XVPERMI_Q RC:$vd, (INSERT_SUBREG (IMPLICIT_DEF), subRC:$vj, subIdx), 48))>; + def : Pat<(VT (insert_subvector RC:$vd, subRC:$vj, (iPTR hiIdx))), + (VT (XVPERMI_Q RC:$vd, (INSERT_SUBREG (IMPLICIT_DEF), subRC:$vj, subIdx), 2))>; +} + +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; +defm : subvector_subreg_lowering; + } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 429d52fb6f230..ae73d8da79f8e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -446,11 +446,18 @@ bool NVPTXDAGToDAGISel::tryUNPACK_VECTOR(SDNode *N) { bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) { SDValue Vector = N->getOperand(0); - // We only care about 16x2 as it's the only real vector type we - // need to deal with. MVT VT = Vector.getSimpleValueType(); - if (!Isv2x16VT(VT)) + if (!(NVPTX::isPackedVectorTy(VT) && VT.getVectorNumElements() == 2)) return false; + + unsigned Opcode; + if (VT.is32BitVector()) + Opcode = NVPTX::I32toV2I16; + else if (VT.is64BitVector()) + Opcode = NVPTX::I64toV2I32; + else + llvm_unreachable("Unhandled packed type"); + // Find and record all uses of this vector that extract element 0 or 1. SmallVector E0, E1; for (auto *U : Vector.getNode()->users()) { @@ -474,11 +481,11 @@ bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) { if (E0.empty() || E1.empty()) return false; - // Merge (f16 extractelt(V, 0), f16 extractelt(V,1)) - // into f16,f16 SplitF16x2(V) + // Merge (EltTy extractelt(V, 0), EltTy extractelt(V,1)) + // into EltTy,EltTy Split[EltTy]x2(V) MVT EltVT = VT.getVectorElementType(); SDNode *ScatterOp = - CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N), EltVT, EltVT, Vector); + CurDAG->getMachineNode(Opcode, SDLoc(N), EltVT, EltVT, Vector); for (auto *Node : E0) ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0)); for (auto *Node : E1) @@ -994,6 +1001,7 @@ pickOpcodeForVT(MVT::SimpleValueType VT, std::optional Opcode_i8, case MVT::i32: case MVT::f32: return Opcode_i32; + case MVT::v2f32: case MVT::i64: case MVT::f64: return Opcode_i64; @@ -2147,16 +2155,9 @@ bool NVPTXScopes::empty() const { return Scopes.size() == 0; } ? NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_SHARED32_##mode##suffix \ : NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_##mode##suffix) -#define CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(op, dim, mode, is_ch, is_s32) \ - (is_ch ? (CP_ASYNC_BULK_TENSOR_OPCODE(op, dim, mode, is_s32, _CH)) \ - : (CP_ASYNC_BULK_TENSOR_OPCODE(op, dim, mode, is_s32, ))) - -#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode, is_reduce, is_ch, \ - is_s32) \ - (is_reduce \ - ? (CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(RED, dim, mode, is_ch, is_s32)) \ - : (CP_ASYNC_BULK_TENSOR_OPCODE_S2G_IMPL(S2G, dim, mode, is_ch, \ - is_s32))) +#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(dim, mode, is_ch, is_s32) \ + (is_ch ? (CP_ASYNC_BULK_TENSOR_OPCODE(RED, dim, mode, is_s32, _CH)) \ + : (CP_ASYNC_BULK_TENSOR_OPCODE(RED, dim, mode, is_s32, ))) #define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode, is_mc, is_ch, is_s32) \ [&]() -> auto { \ @@ -2169,48 +2170,45 @@ bool NVPTXScopes::empty() const { return Scopes.size() == 0; } return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, is_s32, ); \ }() -#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(dim, mode, is_ch) \ - (is_ch ? NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode##_CH \ - : NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode) - -static unsigned GetCpAsyncBulkTensorS2GOpcode(size_t Dim, bool IsShared32, - bool IsCacheHint, bool IsIm2Col, - bool IsReduce = false) { +static unsigned GetCpAsyncBulkTensorS2GReductionOpcode(size_t Dim, + bool IsShared32, + bool IsCacheHint, + bool IsIm2Col) { if (IsIm2Col) { switch (Dim) { case 3: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(3D, IM2COL, IsReduce, - IsCacheHint, IsShared32); + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(3D, IM2COL, IsCacheHint, + IsShared32); case 4: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(4D, IM2COL, IsReduce, - IsCacheHint, IsShared32); + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(4D, IM2COL, IsCacheHint, + IsShared32); case 5: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(5D, IM2COL, IsReduce, - IsCacheHint, IsShared32); + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(5D, IM2COL, IsCacheHint, + IsShared32); default: llvm_unreachable("Invalid Dimension in im2col mode for " - "GetCpAsyncBulkTensorS2GOpcode."); + "GetCpAsyncBulkTensorS2GReductionOpcode."); } } else { switch (Dim) { case 1: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(1D, TILE, IsReduce, - IsCacheHint, IsShared32); + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(1D, TILE, IsCacheHint, + IsShared32); case 2: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(2D, TILE, IsReduce, - IsCacheHint, IsShared32); + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(2D, TILE, IsCacheHint, + IsShared32); case 3: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(3D, TILE, IsReduce, - IsCacheHint, IsShared32); + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(3D, TILE, IsCacheHint, + IsShared32); case 4: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(4D, TILE, IsReduce, - IsCacheHint, IsShared32); + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(4D, TILE, IsCacheHint, + IsShared32); case 5: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(5D, TILE, IsReduce, - IsCacheHint, IsShared32); + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G_RED(5D, TILE, IsCacheHint, + IsShared32); default: - llvm_unreachable( - "Invalid Dimension in tile mode for GetCpAsyncBulkTensorS2GOpcode."); + llvm_unreachable("Invalid Dimension in tile mode for " + "GetCpAsyncBulkTensorS2GReductionOpcode."); } } } @@ -2257,39 +2255,6 @@ static unsigned GetCpAsyncBulkTensorG2SOpcode(size_t Dim, bool IsShared32, } } -static unsigned GetCpAsyncBulkTensorPrefetchOpcode(size_t Dim, bool IsCacheHint, - bool IsIm2Col) { - if (IsIm2Col) { - switch (Dim) { - case 3: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(3D, IM2COL, IsCacheHint); - case 4: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(4D, IM2COL, IsCacheHint); - case 5: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(5D, IM2COL, IsCacheHint); - default: - llvm_unreachable("Invalid Dimension in im2col mode for " - "GetCpAsyncBulkTensorPrefetchOpcode."); - } - } else { - switch (Dim) { - case 1: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(1D, TILE, IsCacheHint); - case 2: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(2D, TILE, IsCacheHint); - case 3: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(3D, TILE, IsCacheHint); - case 4: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(4D, TILE, IsCacheHint); - case 5: - return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(5D, TILE, IsCacheHint); - default: - llvm_unreachable("Invalid Dimension in tile mode for " - "GetCpAsyncBulkTensorPrefetchOpcode."); - } - } -} - static size_t GetDimsFromIntrinsic(unsigned IID) { switch (IID) { case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d: @@ -2354,52 +2319,6 @@ void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon(SDNode *N, ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops)); } -void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorS2GCommon(SDNode *N, - bool IsIm2Col) { - // We have {Chain, Intrinsic-ID} followed by the actual intrisic args: - // src, dst, dims{d0...dN}, cache_hint, cache_hint_flag - // NumOperands = {Chain, IID} + {Actual intrinsic args} - // = {2} + {4 + dims} - size_t NumOps = N->getNumOperands(); - size_t NumDims = NumOps - 6; - bool IsCacheHint = N->getConstantOperandVal(NumOps - 1) == 1; - size_t NumArgs = NumDims + (IsCacheHint ? 3 : 2); // src, dst, cache_hint - - SDLoc DL(N); - SmallVector Ops(N->ops().slice(2, NumArgs)); - Ops.push_back(N->getOperand(0)); // Chain operand - - bool IsShared32 = - CurDAG->getDataLayout().getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32; - unsigned Opcode = - GetCpAsyncBulkTensorS2GOpcode(NumDims, IsShared32, IsCacheHint, IsIm2Col); - ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops)); -} - -void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorPrefetchCommon(SDNode *N, - bool IsIm2Col) { - // We have {Chain, Intrinsic-ID} followed by the actual intrisic args: - // {src, dims{d0...dN}, im2col_offsets{dims-2} - // cache_hint, cache_hint_flag} - // NumOperands = {Chain, IID} + {Actual intrinsic args} - // = {2} + {3 + dims + im2col_offsets} - size_t NumOps = N->getNumOperands(); - size_t NumDims = IsIm2Col ? GetDimsFromIntrinsic(N->getConstantOperandVal(1)) - : (NumOps - 5); - // Offsets is always 'NumDims - 2' and only for im2col mode - size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0; - bool IsCacheHint = N->getConstantOperandVal(NumOps - 1) == 1; - size_t NumArgs = NumDims + NumOffsets + (IsCacheHint ? 2 : 1); - - SDLoc DL(N); - SmallVector Ops(N->ops().slice(2, NumArgs)); - Ops.push_back(N->getOperand(0)); // Chain operand - - unsigned Opcode = - GetCpAsyncBulkTensorPrefetchOpcode(NumDims, IsCacheHint, IsIm2Col); - ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops)); -} - void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(SDNode *N, unsigned RedOp, bool IsIm2Col) { @@ -2419,8 +2338,8 @@ void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(SDNode *N, bool IsShared32 = CurDAG->getDataLayout().getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32; - unsigned Opcode = GetCpAsyncBulkTensorS2GOpcode( - NumDims, IsShared32, IsCacheHint, IsIm2Col, /*IsReduce=*/true); + unsigned Opcode = GetCpAsyncBulkTensorS2GReductionOpcode( + NumDims, IsShared32, IsCacheHint, IsIm2Col); ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops)); } @@ -2540,18 +2459,6 @@ bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) { switch (IID) { default: return false; - case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_1d: - case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_2d: - case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_3d: - case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_4d: - case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_tile_5d: - SelectCpAsyncBulkTensorS2GCommon(N); - return true; - case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_3d: - case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_4d: - case Intrinsic::nvvm_cp_async_bulk_tensor_s2g_im2col_5d: - SelectCpAsyncBulkTensorS2GCommon(N, /*IsIm2Col=*/true); - return true; case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d: case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d: case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d: @@ -2564,18 +2471,6 @@ bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) { case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d: SelectCpAsyncBulkTensorG2SCommon(N, /*IsIm2Col=*/true); return true; - case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_1d: - case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_2d: - case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_3d: - case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_4d: - case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_5d: - SelectCpAsyncBulkTensorPrefetchCommon(N); - return true; - case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d: - case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d: - case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d: - SelectCpAsyncBulkTensorPrefetchCommon(N, /*IsIm2Col=*/true); - return true; case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_1d: case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_2d: case Intrinsic::nvvm_cp_async_bulk_tensor_reduce_add_tile_3d: diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index b314c4ccefe8b..88e5328ff69c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -92,8 +92,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { void SelectV2I64toI128(SDNode *N); void SelectI128toV2I64(SDNode *N); void SelectCpAsyncBulkTensorG2SCommon(SDNode *N, bool IsIm2Col = false); - void SelectCpAsyncBulkTensorS2GCommon(SDNode *N, bool IsIm2Col = false); - void SelectCpAsyncBulkTensorPrefetchCommon(SDNode *N, bool IsIm2Col = false); void SelectCpAsyncBulkTensorReduceCommon(SDNode *N, unsigned RedOp, bool IsIm2Col = false); void SelectTcgen05Ld(SDNode *N, bool hasOffset = false); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index bb0aeb493ed48..3d010e04824c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -196,11 +196,6 @@ static bool IsPTXVectorType(MVT VT) { } } -static bool Is16bitsType(MVT VT) { - return (VT.SimpleTy == MVT::f16 || VT.SimpleTy == MVT::bf16 || - VT.SimpleTy == MVT::i16); -} - // When legalizing vector loads/stores, this function is called, which does two // things: // 1. Determines Whether the vector is something we want to custom lower, @@ -223,6 +218,9 @@ getVectorLoweringShape(EVT VectorEVT, bool CanLowerTo256Bit) { const MVT EltVT = VectorVT.getVectorElementType(); const unsigned NumElts = VectorVT.getVectorNumElements(); + // The size of the PTX virtual register that holds a packed type. + unsigned PackRegSize; + // We only handle "native" vector sizes for now, e.g. <4 x double> is not // legal. We can (and should) split that into 2 stores of <2 x double> here // but I'm leaving that as a TODO for now. @@ -232,7 +230,6 @@ getVectorLoweringShape(EVT VectorEVT, bool CanLowerTo256Bit) { case MVT::v4i64: case MVT::v4f64: case MVT::v8i32: - case MVT::v8f32: // This is a "native" vector type iff the address space is global // and the target supports 256-bit loads/stores if (!CanLowerTo256Bit) @@ -241,10 +238,8 @@ getVectorLoweringShape(EVT VectorEVT, bool CanLowerTo256Bit) { case MVT::v2i8: case MVT::v2i32: case MVT::v2i64: - case MVT::v2f32: case MVT::v2f64: case MVT::v4i32: - case MVT::v4f32: // This is a "native" vector type return std::pair(NumElts, EltVT); case MVT::v16f16: // <8 x f16x2> @@ -268,22 +263,26 @@ getVectorLoweringShape(EVT VectorEVT, bool CanLowerTo256Bit) { case MVT::v8bf16: // <4 x bf16x2> case MVT::v8i16: // <4 x i16x2> case MVT::v16i8: // <4 x i8x4> - // This can be upsized into a "native" vector type. - // Despite vectors like v8i8, v16i8, v8i16 being within the bit-limit for - // total load/store size, PTX syntax only supports v2/v4. Thus, we can't use - // vectorized loads/stores with the actual element type for i8/i16 as that - // would require v8/v16 variants that do not exist. - // In order to load/store such vectors efficiently, here in Type - // Legalization, we split the vector into word-sized chunks (v2x16/v4i8). - // Later, we will lower to PTX as vectors of b32. + PackRegSize = 32; + break; + case MVT::v8f32: // <4 x f32x2> + if (!CanLowerTo256Bit) + return std::nullopt; + LLVM_FALLTHROUGH; + case MVT::v2f32: // <1 x f32x2> + case MVT::v4f32: // <2 x f32x2> + PackRegSize = 64; + break; + } - // Number of elements to pack in one word. - const unsigned NPerWord = 32 / EltVT.getSizeInBits(); + // If we reach here, then we can pack 2 or more elements into a single 32-bit + // or 64-bit PTX register and treat the vector as a new vector containing + // packed elements. - return std::pair(NumElts / NPerWord, MVT::getVectorVT(EltVT, NPerWord)); - } + // Number of elements to pack in one word. + const unsigned NPerReg = PackRegSize / EltVT.getSizeInBits(); - llvm_unreachable("All cases in switch should return."); + return std::pair(NumElts / NPerReg, MVT::getVectorVT(EltVT, NPerReg)); } /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive @@ -330,53 +329,49 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, return; } + // Will split structs and arrays into member types, but will not split vector + // types. We do that manually below. ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); - for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { - EVT VT = TempVTs[i]; - uint64_t Off = TempOffsets[i]; - // Split vectors into individual elements, except for v2f16, which - // we will pass as a single scalar. + + for (auto [VT, Off] : zip(TempVTs, TempOffsets)) { + // Split vectors into individual elements that fit into registers. if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - // We require power-of-2 sized vectors because + // Below we must maintain power-of-2 sized vectors because // TargetLoweringBase::getVectorTypeBreakdown() which is invoked in // ComputePTXValueVTs() cannot currently break down non-power-of-2 sized // vectors. - if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0 && - isPowerOf2_32(NumElts)) { - // Vectors with an even number of f16 elements will be passed to - // us as an array of v2f16/v2bf16 elements. We must match this so we - // stay in sync with Ins/Outs. - switch (EltVT.getSimpleVT().SimpleTy) { - case MVT::f16: - EltVT = MVT::v2f16; - break; - case MVT::bf16: - EltVT = MVT::v2bf16; - break; - case MVT::i16: - EltVT = MVT::v2i16; - break; - default: - llvm_unreachable("Unexpected type"); - } - NumElts /= 2; - } else if (EltVT.getSimpleVT() == MVT::i8 && - ((NumElts % 4 == 0 && isPowerOf2_32(NumElts)) || - NumElts == 3)) { - // v*i8 are formally lowered as v4i8 + + // If the element type belongs to one of the supported packed vector types + // then we can pack multiples of this element into a single register. + if (VT == MVT::v2i8) { + // We can pack 2 i8s into a single 16-bit register. We only do this for + // loads and stores, which is why we have a separate case for it. + EltVT = MVT::v2i8; + NumElts = 1; + } else if (VT == MVT::v3i8) { + // We can also pack 3 i8s into 32-bit register, leaving the 4th + // element undefined. EltVT = MVT::v4i8; - NumElts = (NumElts + 3) / 4; - } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) { - // v2i8 is promoted to v2i16 NumElts = 1; - EltVT = MVT::v2i8; + } else if (NumElts > 1 && isPowerOf2_32(NumElts)) { + // Handle default packed types. + for (MVT PackedVT : NVPTX::packed_types()) { + const auto NumEltsPerReg = PackedVT.getVectorNumElements(); + if (NumElts % NumEltsPerReg == 0 && + EltVT == PackedVT.getVectorElementType()) { + EltVT = PackedVT; + NumElts /= NumEltsPerReg; + break; + } + } } - for (unsigned j = 0; j != NumElts; ++j) { + + for (unsigned J : seq(NumElts)) { ValueVTs.push_back(EltVT); if (Offsets) - Offsets->push_back(Off + j * EltVT.getStoreSize()); + Offsets->push_back(Off + J * EltVT.getStoreSize()); } } else { ValueVTs.push_back(VT); @@ -594,6 +589,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, addRegisterClass(MVT::v2f16, &NVPTX::B32RegClass); addRegisterClass(MVT::bf16, &NVPTX::B16RegClass); addRegisterClass(MVT::v2bf16, &NVPTX::B32RegClass); + addRegisterClass(MVT::v2f32, &NVPTX::B64RegClass); // Conversion to/from FP16/FP16x2 is always legal. setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom); @@ -630,6 +626,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom); + // No support for these operations with v2f32. + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Expand); + // Custom conversions to/from v2i8. setOperationAction(ISD::BITCAST, MVT::v2i8, Custom); @@ -655,12 +655,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // Operations not directly supported by NVPTX. for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, - MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8, - MVT::i32, MVT::i64}) { + MVT::v2f32, MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, + MVT::v4i8, MVT::i32, MVT::i64}) { setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BR_CC, VT, Expand); } + // Not directly supported. TLI would attempt to expand operations like + // FMINIMUM(v2f32) using invalid SETCC and VSELECT nodes. + setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); + // Some SIGN_EXTEND_INREG can be done using cvt instruction. // For others we will expand to a SHL/SRA pair. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); @@ -857,6 +861,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setBF16OperationAction(Op, MVT::bf16, Legal, Promote); if (getOperationAction(Op, MVT::bf16) == Promote) AddPromotedToType(Op, MVT::bf16, MVT::f32); + setOperationAction(Op, MVT::v2f32, + STI.hasF32x2Instructions() ? Legal : Expand); } // On SM80, we select add/mul/sub as fma to avoid promotion to float @@ -878,6 +884,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setBF16OperationAction(ISD::FNEG, MVT::bf16, Legal, Expand); setBF16OperationAction(ISD::FNEG, MVT::v2bf16, Legal, Expand); + setOperationAction(ISD::FNEG, MVT::v2f32, Expand); // (would be) Library functions. // These map to conversion instructions for scalar FP types. @@ -888,6 +895,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(Op, MVT::f64, Legal); setOperationAction(Op, MVT::v2f16, Expand); setOperationAction(Op, MVT::v2bf16, Expand); + setOperationAction(Op, MVT::v2f32, Expand); setBF16OperationAction(Op, MVT::bf16, Legal, Promote); if (getOperationAction(Op, MVT::bf16) == Promote) AddPromotedToType(Op, MVT::bf16, MVT::f32); @@ -903,6 +911,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, } } + // Expand v2f32 = fp_extend + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand); + // Expand v2[b]f16 = fp_round v2f32 + setOperationAction(ISD::FP_ROUND, {MVT::v2bf16, MVT::v2f16}, Expand); + // sm_80 only has conversions between f32 and bf16. Custom lower all other // bf16 conversions. if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) { @@ -940,14 +953,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(Op, MVT::f16, Promote); setOperationAction(Op, MVT::f32, Legal); setOperationAction(Op, MVT::f64, Legal); - setOperationAction(Op, MVT::v2f16, Expand); - setOperationAction(Op, MVT::v2bf16, Expand); + setOperationAction(Op, {MVT::v2f16, MVT::v2bf16, MVT::v2f32}, Expand); setOperationAction(Op, MVT::bf16, Promote); AddPromotedToType(Op, MVT::bf16, MVT::f32); } setOperationAction(ISD::FREM, {MVT::f32, MVT::f64}, Custom); setOperationAction(ISD::FABS, {MVT::f32, MVT::f64}, Legal); + setOperationAction(ISD::FABS, MVT::v2f32, Expand); if (STI.getPTXVersion() >= 65) { setFP16OperationAction(ISD::FABS, MVT::f16, Legal, Promote); setFP16OperationAction(ISD::FABS, MVT::v2f16, Legal, Expand); @@ -969,6 +982,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setBF16OperationAction(Op, MVT::bf16, Legal, Promote); if (getOperationAction(Op, MVT::bf16) == Promote) AddPromotedToType(Op, MVT::bf16, MVT::f32); + setOperationAction(Op, MVT::v2f32, Expand); } bool SupportsF32MinMaxNaN = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70; @@ -978,6 +992,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setFP16OperationAction(Op, MVT::v2f16, Legal, Expand); setBF16OperationAction(Op, MVT::bf16, Legal, Expand); setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand); + setOperationAction(Op, MVT::v2f32, Expand); } // Custom lowering for inline asm with 128-bit operands @@ -990,6 +1005,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // - bf16/bf16x2 (sm_90+, PTX 7.8+) // When f16/bf16 types aren't supported, they are promoted/expanded to f32. setOperationAction(ISD::FEXP2, MVT::f32, Legal); + setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); setFP16OperationAction(ISD::FEXP2, MVT::f16, Legal, Promote); setFP16OperationAction(ISD::FEXP2, MVT::v2f16, Legal, Expand); setBF16OperationAction(ISD::FEXP2, MVT::bf16, Legal, Promote); @@ -1001,7 +1017,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::FLOG2, MVT::f32, Legal); setOperationPromotedToType(ISD::FLOG2, MVT::f16, MVT::f32); setOperationPromotedToType(ISD::FLOG2, MVT::bf16, MVT::f32); - setOperationAction(ISD::FLOG2, {MVT::v2f16, MVT::v2bf16}, Expand); + setOperationAction(ISD::FLOG2, {MVT::v2f16, MVT::v2bf16, MVT::v2f32}, + Expand); } setOperationAction(ISD::ADDRSPACECAST, {MVT::i32, MVT::i64}, Custom); @@ -2074,7 +2091,7 @@ SDValue NVPTXTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op->getValueType(0); - if (!(Isv2x16VT(VT) || VT == MVT::v4i8)) + if (!(NVPTX::isPackedVectorTy(VT) && VT.is32BitVector())) return Op; SDLoc DL(Op); @@ -2124,15 +2141,26 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, Value = Value.trunc(8); return Value.zext(32); }; - APInt Value; - if (Isv2x16VT(VT)) { - Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(16); - } else if (VT == MVT::v4i8) { - Value = GetOperand(Op, 0) | GetOperand(Op, 1).shl(8) | - GetOperand(Op, 2).shl(16) | GetOperand(Op, 3).shl(24); - } else { - llvm_unreachable("Unsupported type"); - } + + // Construct a 32-bit constant by shifting into place smaller values + // (elements of the vector type VT). + // For example, if VT has 2 elements, then N == 2: + // ShiftAmount = 32 / N = 16 + // Value |= Op0 (b16) << 0 + // Value |= Op1 (b16) << 16 + // If N == 4: + // ShiftAmount = 32 / N = 8 + // Value |= Op0 (b8) << 0 + // Value |= Op1 (b8) << 8 + // Value |= Op2 (b8) << 16 + // Value |= Op3 (b8) << 24 + // ...etc + APInt Value(32, 0); + const unsigned NumElements = VT.getVectorNumElements(); + assert(32 % NumElements == 0 && "must evenly divide bit length"); + const unsigned ShiftAmount = 32 / NumElements; + for (unsigned ElementNo : seq(NumElements)) + Value |= GetOperand(Op, ElementNo).shl(ElementNo * ShiftAmount); SDValue Const = DAG.getConstant(Value, DL, MVT::i32); return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), Const); } @@ -2160,7 +2188,8 @@ SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return Op; // Extract individual elements and select one of them. - assert(Isv2x16VT(VectorVT) && "Unexpected vector type."); + assert(NVPTX::isPackedVectorTy(VectorVT) && + VectorVT.getVectorNumElements() == 2 && "Unexpected vector type."); EVT EltVT = VectorVT.getVectorElementType(); SDLoc dl(Op.getNode()); @@ -3069,14 +3098,19 @@ SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV)); } +static void replaceLoadVector(SDNode *N, SelectionDAG &DAG, + SmallVectorImpl &Results, + const NVPTXSubtarget &STI); + SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) return LowerLOADi1(Op, DAG); - // v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to handle - // unaligned loads and have to handle it here. EVT VT = Op.getValueType(); - if (Isv2x16VT(VT) || VT == MVT::v4i8) { + + if (NVPTX::isPackedVectorTy(VT)) { + // v2f32/v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to + // handle unaligned loads and have to handle it here. LoadSDNode *Load = cast(Op); EVT MemVT = Load->getMemoryVT(); if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), @@ -3120,17 +3154,19 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if (VT == MVT::i1) return LowerSTOREi1(Op, DAG); - // v2f16 is legal, so we can't rely on legalizer to handle unaligned - // stores and have to handle it here. - if ((Isv2x16VT(VT) || VT == MVT::v4i8) && + // v2f32/v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to + // handle unaligned stores and have to handle it here. + if (NVPTX::isPackedVectorTy(VT) && !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), VT, *Store->getMemOperand())) return expandUnalignedStore(Store, DAG); - // v2f16, v2bf16 and v2i16 don't need special handling. - if (Isv2x16VT(VT) || VT == MVT::v4i8) + // v2f16/v2bf16/v2i16 don't need special handling. + if (NVPTX::isPackedVectorTy(VT) && VT.is32BitVector()) return SDValue(); + // Lower store of any other vector type, including v2f32 as we want to break + // it apart since this is not a widely-supported type. return LowerSTOREVector(Op, DAG); } @@ -4920,7 +4956,7 @@ PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } -/// Fold extractelts into a load by increasing the number of return values. +/// Fold unpacking movs into a load by increasing the number of return values. /// /// ex: /// L: v2f16,ch = load

@@ -4929,6 +4965,7 @@ PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, /// use(a, b) /// /// ...is turned into... +/// /// L: f16,f16,ch = LoadV2

/// use(L:0, L:1) static SDValue @@ -4937,10 +4974,13 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (!DCI.isAfterLegalizeDAG()) return SDValue(); - EVT ElemVT = N->getValueType(0); - if (!Isv2x16VT(ElemVT)) + EVT ElementVT = N->getValueType(0); + // Avoid non-packed types and v4i8 + if (!NVPTX::isPackedVectorTy(ElementVT) || ElementVT == MVT::v4i8) return SDValue(); + SmallVector DeadCopyToRegs; + // Check whether all outputs are either used by an extractelt or are // glue/chain nodes if (!all_of(N->uses(), [&](SDUse &U) { @@ -4968,6 +5008,12 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return !U.getUser()->use_empty(); } + // Handle CopyToReg nodes that will become dead after our replacement + if (U.getUser()->getOpcode() == ISD::CopyToReg) { + DeadCopyToRegs.push_back(U.getUser()); + return true; + } + // Otherwise, this use prevents us from splitting a value. return false; })) @@ -5000,6 +5046,13 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { Opcode = NVPTXISD::LoadV4; break; case NVPTXISD::LoadV4: + // V8 is only supported for f32. Don't forget, we're not changing the load + // size here. This is already a 256-bit load. + if (ElementVT != MVT::v2f32) + return SDValue(); + OldNumOutputs = 4; + Opcode = NVPTXISD::LoadV8; + break; case NVPTXISD::LoadV8: // PTX doesn't support the next doubling of outputs return SDValue(); @@ -5007,7 +5060,7 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // the non-glue, non-chain outputs in the new load const unsigned NewNumOutputs = OldNumOutputs * 2; - SmallVector NewVTs(NewNumOutputs, ElemVT.getVectorElementType()); + SmallVector NewVTs(NewNumOutputs, ElementVT.getVectorElementType()); // add remaining chain and glue values NewVTs.append(LD->value_begin() + OldNumOutputs, LD->value_end()); @@ -5022,23 +5075,28 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SmallVector Results; for (unsigned I : seq(OldNumOutputs)) Results.push_back(DCI.DAG.getBuildVector( - ElemVT, DL, {NewLoad.getValue(I * 2), NewLoad.getValue(I * 2 + 1)})); + ElementVT, DL, {NewLoad.getValue(I * 2), NewLoad.getValue(I * 2 + 1)})); // Add remaining chain and glue nodes for (unsigned I : seq(NewLoad->getNumValues() - NewNumOutputs)) Results.push_back(NewLoad.getValue(NewNumOutputs + I)); + // Remove dead CopyToReg nodes by folding them into the chain they reference + for (SDNode *CTR : DeadCopyToRegs) + DCI.CombineTo(CTR, CTR->getOperand(0)); + return DCI.DAG.getMergeValues(Results, DL); } -/// Fold a packing mov into a store. +/// Fold packing movs into a store. /// /// ex: -/// v: v2f16 = BUILD_VECTOR a:f16, b:f16 -/// StoreRetval v +/// v1: v2f16 = BUILD_VECTOR a:f16, b:f16 +/// v2: v2f16 = BUILD_VECTOR c:f16, d:f16 +/// StoreV2 v1, v2 /// /// ...is turned into... /// -/// StoreRetvalV2 a:f16, b:f16 +/// StoreV4 a, b, c, d static SDValue combinePackingMovIntoStore(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned Front, unsigned Back) { @@ -5050,7 +5108,8 @@ static SDValue combinePackingMovIntoStore(SDNode *N, // Get the type of the operands being stored. EVT ElementVT = N->getOperand(Front).getValueType(); - if (!Isv2x16VT(ElementVT)) + // Avoid non-packed types and v4i8 + if (!NVPTX::isPackedVectorTy(ElementVT) || ElementVT == MVT::v4i8) return SDValue(); auto *ST = cast(N); @@ -5077,6 +5136,12 @@ static SDValue combinePackingMovIntoStore(SDNode *N, Opcode = NVPTXISD::StoreV4; break; case NVPTXISD::StoreV4: + // V8 is only supported for f32. Don't forget, we're not changing the store + // size here. This is already a 256-bit store. + if (ElementVT != MVT::v2f32) + return SDValue(); + Opcode = NVPTXISD::StoreV8; + break; case NVPTXISD::StoreParamV4: case NVPTXISD::StoreV8: // PTX doesn't support the next doubling of operands @@ -5606,10 +5671,10 @@ static SDValue PerformEXTRACTCombine(SDNode *N, IsPTXVectorType(VectorVT.getSimpleVT())) return SDValue(); // Native vector loads already combine nicely w/ // extract_vector_elt. - // Don't mess with singletons or v2*16, v4i8 and v8i8 types, we already - // handle them OK. - if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) || - VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8) + // Don't mess with singletons or packed types (v2f32, v2*16, v4i8 and v8i8), + // we already handle them OK. + if (VectorVT.getVectorNumElements() == 1 || + NVPTX::isPackedVectorTy(VectorVT) || VectorVT == MVT::v8i8) return SDValue(); // Don't mess with undef values as sra may be simplified to 0, not undef. @@ -5682,7 +5747,10 @@ static SDValue PerformVSELECTCombine(SDNode *N, static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { auto VT = N->getValueType(0); - if (!DCI.isAfterLegalizeDAG() || !Isv2x16VT(VT)) + if (!DCI.isAfterLegalizeDAG() || + // only process v2*16 types + !(NVPTX::isPackedVectorTy(VT) && VT.is32BitVector() && + VT.getVectorNumElements() == 2)) return SDValue(); auto Op0 = N->getOperand(0); @@ -5822,7 +5890,7 @@ static void ReplaceBITCAST(SDNode *Node, SelectionDAG &DAG, } /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. -static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, +static void replaceLoadVector(SDNode *N, SelectionDAG &DAG, SmallVectorImpl &Results, const NVPTXSubtarget &STI) { LoadSDNode *LD = cast(N); @@ -6146,7 +6214,7 @@ void NVPTXTargetLowering::ReplaceNodeResults( ReplaceBITCAST(N, DAG, Results); return; case ISD::LOAD: - ReplaceLoadVector(N, DAG, Results, STI); + replaceLoadVector(N, DAG, Results, STI); return; case ISD::INTRINSIC_W_CHAIN: ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index dcdebb81e3c86..db6b411509e93 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -131,6 +131,7 @@ def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; def hasDotInstructions : Predicate<"Subtarget->hasDotInstructions()">; def hasTcgen05Instructions : Predicate<"Subtarget->hasTcgen05Instructions()">; +def hasF32x2Instructions : Predicate<"Subtarget->hasF32x2Instructions()">; class hasPTX: Predicate<"Subtarget->getPTXVersion() >= " # version>; class hasSM: Predicate<"Subtarget->getSmVersion() >= " # version>; @@ -199,6 +200,7 @@ def BF16RT : RegTyInfo; def F16X2RT : RegTyInfo; def BF16X2RT : RegTyInfo; +def F32X2RT : RegTyInfo; // This class provides a basic wrapper around an NVPTXInst that abstracts the @@ -395,6 +397,13 @@ multiclass F3 { op_str # "$ftz.f16", [(set f16:$dst, (op_pat f16:$a, f16:$b))]>, Requires<[useFP16Math]>; + def f32x2rr : + BasicFlagsNVPTXInst<(outs B64:$dst), + (ins B64:$a, B64:$b), + (ins FTZFlag:$ftz), + op_str # "$ftz.f32x2", + [(set v2f32:$dst, (op_pat v2f32:$a, v2f32:$b))]>, + Requires<[hasF32x2Instructions]>; def f16x2rr : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b), @@ -747,6 +756,9 @@ def : Pat<(vt (select i1:$p, vt:$a, vt:$b)), (SELP_b32rr $a, $b, $p)>; } +def : Pat<(v2f32 (select i1:$p, v2f32:$a, v2f32:$b)), + (SELP_b64rr $a, $b, $p)>; + //----------------------------------- // Test Instructions //----------------------------------- @@ -1218,6 +1230,7 @@ defm FMA_F16x2 : FMA; defm FMA_BF16 : FMA; defm FMA_BF16x2 : FMA; defm FMA_F32 : FMA; +defm FMA_F32x2 : FMA; defm FMA_F64 : FMA; // sin/cos @@ -2302,6 +2315,7 @@ def : Pat<(i32 (trunc (sra i64:$s, (i32 32)))), (I64toI32H $s)>; def: Pat<(i32 (sext (extractelt v2i16:$src, 0))), (CVT_INREG_s32_s16 $src)>; +// Handle extracting one element from the pair (32-bit types) foreach vt = [v2f16, v2bf16, v2i16] in { def : Pat<(extractelt vt:$src, 0), (I32toI16L_Sink $src)>, Requires<[hasPTX<71>]>; def : Pat<(extractelt vt:$src, 1), (I32toI16H_Sink $src)>, Requires<[hasPTX<71>]>; @@ -2313,10 +2327,21 @@ foreach vt = [v2f16, v2bf16, v2i16] in { (V2I16toI32 $a, $b)>; } +// Same thing for the 64-bit type v2f32. +foreach vt = [v2f32] in { + def : Pat<(extractelt vt:$src, 0), (I64toI32L_Sink $src)>, Requires<[hasPTX<71>]>; + def : Pat<(extractelt vt:$src, 1), (I64toI32H_Sink $src)>, Requires<[hasPTX<71>]>; + + def : Pat<(extractelt vt:$src, 0), (I64toI32L $src)>; + def : Pat<(extractelt vt:$src, 1), (I64toI32H $src)>; + + def : Pat<(vt (build_vector vt.ElementType:$a, vt.ElementType:$b)), + (V2I32toI64 $a, $b)>; +} + def: Pat<(v2i16 (scalar_to_vector i16:$a)), (CVT_u32_u16 $a, CvtNONE)>; - def nvptx_build_vector : SDNode<"NVPTXISD::BUILD_VECTOR", SDTypeProfile<1, 2, []>, []>; def : Pat<(i64 (nvptx_build_vector i32:$a, i32:$b)), @@ -2467,7 +2492,7 @@ def : Pat<(brcond i32:$a, bb:$target), // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a // conditional branch if the target block is the next block so that the code -// can fall through to the target block. The invertion is done by 'xor +// can fall through to the target block. The inversion is done by 'xor // condition, 1', which will be translated to (setne condition, -1). Since ptx // supports '@!pred bra target', we should use it. def : Pat<(brcond (i1 (setne i1:$a, -1)), bb:$target), @@ -2707,4 +2732,4 @@ let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in { let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in { def FMARELU_BF16 : FMARELUInst; def FMARELU_BF16X2 : FMARELUInst; -} \ No newline at end of file +} diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index d840324ce8238..93827be5c2811 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -560,6 +560,30 @@ defm CP_ASYNC_BULK_PREFETCH_CH : CP_ASYNC_BULK_PREFETCH_INTR; // TMA Async Bulk Tensor Copy Functions //------------------------------------- +class TMA_DIMS_UTIL { + // For example, when 'dim' is 3, this generates: + // an ins_dag: B32:$d0, B32:$d1, B32:$d2 + // with base_str: $d0, $d1, $d2 + dag ins_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); + string base_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); +} + +class TMA_IM2COL_UTIL { + // For im2col_w/w_128 modes, number of offsets is always 2. + // For im2col mode, offsets is (dim - 2). + // For non-im2col modes (i.e. tile) there are no offsets. + int offsets = !cond( + !eq(mode, "im2col") : !sub(dim, 2), + !eq(mode, "im2col_w") : 2, + !eq(mode, "im2col_w_128") : 2, + true : 0); // for all other modes + + dag ins_dag = !if(!gt(offsets, 0), + !dag(ins, !listsplat(B16, offsets), !foreach(i, !range(offsets), "im2col" # i)), + (ins)); + string base_str = !interleave(!foreach(i, !range(offsets), "$im2col" # i), ", "); +} + // From Global to Shared memory (G2S) class G2S_STRINGS { string prefix = "cp.async.bulk.tensor"; @@ -583,8 +607,8 @@ def CTAGroupFlags : Operand { } multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR { - defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); - defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); + defvar dims_dag = TMA_DIMS_UTIL.ins_dag; + defvar dims_str = TMA_DIMS_UTIL.base_str; defvar asm_str_default = "$cg [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]"; defvar rc = !if(is_shared32, B32, B64); @@ -628,39 +652,46 @@ foreach dim = [1, 2, 3, 4, 5] in { } } -// From Shared to Global memory (S2G) -class S2G_STRINGS { - string dir = "global.shared::cta"; - string completion = "bulk_group"; - string inst_name = !if(is_reduce, "cp.reduce", "cp") - # ".async.bulk.tensor" - # "." # dim # "d" - # "." # dir - # "." # mode - # "." # completion - # !if(ch, ".L2::cache_hint", ""); - string intr_name = "CP_ASYNC_BULK_TENSOR_" - # !if(is_reduce, "RED_", "S2G_") - # dim # "D" - # !if(is_shared32, "_SHARED32", "") - # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); -} - -multiclass CP_ASYNC_BULK_TENSOR_S2G_INTR { - defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); - defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); +multiclass TMA_TENSOR_S2G_INTR pred = [hasPTX<80>, hasSM<90>]> { + defvar dims_dag = TMA_DIMS_UTIL.ins_dag; + defvar dims_str = TMA_DIMS_UTIL.base_str; defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]"; - defvar rc = !if(shared32, B32, B64); + + defvar intr = !cast( + "int_nvvm_cp_async_bulk_tensor_s2g_" # mode # "_" # dim # d); + defvar intr_dag = !con((intr addr:$src, B64:$tmap), + !setdagop(dims_dag, intr), + (intr B64:$ch, 0)); + defvar intr_dag_with_ch = !con((intr addr:$src, B64:$tmap), + !setdagop(dims_dag, intr), + (intr B64:$ch, -1)); + + // For im2col mode, the actual asm_str is "im2col_no_offs" + defvar mode_asm_str = !if(!eq(mode, "im2col"), + "im2col_no_offs", mode); + defvar prefix = "cp.async.bulk.tensor" + # "." # dim # "d" + # ".global.shared::cta" + # "." # mode_asm_str + # ".bulk_group"; def "" : NVPTXInst<(outs), - !con((ins rc:$src, B64:$tmap), dims_dag), - !strconcat(S2G_STRINGS.inst_name, asm_str, ";"), []>, - Requires<[hasPTX<80>, hasSM<90>]>; + !con((ins ADDR:$src, B64:$tmap), dims_dag, (ins B64:$ch)), + prefix # asm_str # ";", + [intr_dag]>, + Requires; def _CH : NVPTXInst<(outs), - !con((ins rc:$src, B64:$tmap), dims_dag, (ins B64:$ch)), - !strconcat(S2G_STRINGS.inst_name, asm_str, ", $ch;"), []>, - Requires<[hasPTX<80>, hasSM<90>]>; + !con((ins ADDR:$src, B64:$tmap), dims_dag, (ins B64:$ch)), + prefix # ".L2::cache_hint" # asm_str # ", $ch;", + [intr_dag_with_ch]>, + Requires; +} +foreach dim = 1...5 in { + foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { + defvar suffix = !toupper(mode) # "_" # dim # D; + defm TMA_TENSOR_S2G_ # suffix : TMA_TENSOR_S2G_INTR; + } } def TMAReductionFlags : Operand { @@ -669,13 +700,16 @@ def TMAReductionFlags : Operand { // TMA Copy from Shared to Global memory with Reduction multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR { - defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); - defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); + defvar dims_dag = TMA_DIMS_UTIL.ins_dag; + defvar dims_str = TMA_DIMS_UTIL.base_str; defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]"; defvar rc = !if(shared32, B32, B64); + // For im2col mode, the actual asm_str is "im2col_no_offs" + defvar mode_asm_str = !if(!eq(mode, "im2col"), + "im2col_no_offs", mode); defvar prefix = "cp.reduce.async.bulk.tensor" # "." # dim # "d" # ".global.shared::cta"; - defvar suffix = "." # mode # ".bulk_group"; + defvar suffix = "." # mode_asm_str # ".bulk_group"; def "" : NVPTXInst<(outs), !con((ins rc:$src, B64:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)), @@ -689,58 +723,63 @@ multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR foreach dim = [1, 2, 3, 4, 5] in { foreach shared32 = [true, false] in { - foreach mode = !if(!ge(dim, 3), ["tile", "im2col_no_offs"], ["tile"]) in { - defm S2G_STRINGS.intr_name : - CP_ASYNC_BULK_TENSOR_S2G_INTR; - defm S2G_STRINGS.intr_name : + foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { + defvar suffix = dim # "D" + # !if(shared32, "_SHARED32", "") + # "_" # !toupper(mode); + defm CP_ASYNC_BULK_TENSOR_RED_ # suffix : CP_ASYNC_BULK_TENSOR_REDUCE_INTR; } } } // TMA Prefetch from Global memory to L2 cache -class PREFETCH_STRINGS { - string prefix = "cp.async.bulk.prefetch.tensor"; - string dir = "L2.global"; - string inst_name = prefix +multiclass TMA_TENSOR_PREFETCH_INTR pred = [hasPTX<80>, hasSM<90>]> { + defvar dims_dag = TMA_DIMS_UTIL.ins_dag; + defvar dims_str = TMA_DIMS_UTIL.base_str; + defvar asm_str_base = " [$tmap, {{" # dims_str # "}}]"; + + defvar im2col_dag = TMA_IM2COL_UTIL.ins_dag; + defvar im2col_str = TMA_IM2COL_UTIL.base_str; + defvar asm_str = !if(!empty(im2col_str), + asm_str_base, + asm_str_base # ", {{" # im2col_str # "}}"); + + defvar inst_name = "cp.async.bulk.prefetch.tensor" # "." # dim # "d" - # "." # dir - # "." # mode - # !if(ch, ".L2::cache_hint", ""); - string intr_name = "CP_ASYNC_BULK_TENSOR_PREFETCH_" - # dim # "D" - # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); -} - -multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR { - defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i)); - defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); - defvar asm_str_default = " [$tmap, {{" # dims_str # "}}]"; - - defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0); - defvar im2col_dag = !if(!eq(mode, "im2col"), - !dag(ins, !listsplat(B16, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), - (ins)); - defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", "); - defvar im2col_asm_str = ", {{" # im2col_str # "}}"; - - defvar asm_str = !if(!eq(mode, "im2col"), - !strconcat(asm_str_default, im2col_asm_str), asm_str_default); - - def "" : NVPTXInst<(outs), - !con((ins B64:$tmap), dims_dag, im2col_dag), - !strconcat(PREFETCH_STRINGS.inst_name, asm_str, ";"), []>, - Requires<[hasPTX<80>, hasSM<90>]>; - def _CH : NVPTXInst<(outs), - !con((ins B64:$tmap), dims_dag, im2col_dag, (ins B64:$ch)), - !strconcat(PREFETCH_STRINGS.inst_name, asm_str, ", $ch;"), []>, - Requires<[hasPTX<80>, hasSM<90>]>; -} - -foreach dim = [1, 2, 3, 4, 5] in { + # "." # "L2.global" + # "." # mode; + + defvar intr = !cast( + "int_nvvm_cp_async_bulk_tensor_prefetch_" # mode # "_" # dim # d); + + defvar ins_dag = !con((ins B64:$tmap), + dims_dag, + im2col_dag, + (ins B64:$ch)); + defvar intr_dag = !con((intr B64:$tmap), + !setdagop(dims_dag, intr), + !setdagop(im2col_dag, intr), + (intr B64:$ch, 0)); + defvar intr_dag_with_ch = !con((intr B64:$tmap), + !setdagop(dims_dag, intr), + !setdagop(im2col_dag, intr), + (intr B64:$ch, -1)); + + def "" : NVPTXInst<(outs), ins_dag, + inst_name # asm_str # ";", + [intr_dag]>, + Requires; + def _CH : NVPTXInst<(outs), ins_dag, + inst_name # ".L2::cache_hint" # asm_str # ", $ch;", + [intr_dag_with_ch]>, + Requires; +} +foreach dim = 1...5 in { foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { - defm PREFETCH_STRINGS.intr_name : - CP_ASYNC_BULK_TENSOR_PREFETCH_INTR; + defvar suffix = !toupper(mode) # "_" # dim # D; + defm TMA_TENSOR_PF_ # suffix : TMA_TENSOR_PREFETCH_INTR; } } diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td index 9fac97d97c609..d40886a56d6a4 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -53,14 +53,17 @@ foreach i = 0...31 in { } //===----------------------------------------------------------------------===// -// Register classes +// Register classes. +// NOTE: if you add new vector types for a register, you must update +// NVPTX::packed_types() in NVPTXUtilities.h accordingly! //===----------------------------------------------------------------------===// def B1 : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; def B16 : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>; def B32 : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8, f32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>; -def B64 : NVPTXRegClass<[i64, f64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>; +def B64 : NVPTXRegClass<[i64, v2f32, f64], 64, (add (sequence "RL%u", 0, 4), + VRFrame64, VRFrameLocal64)>; // 128-bit regs are not defined as general regs in NVPTX. They are used for inlineASM only. def B128 : NVPTXRegClass<[i128], 128, (add (sequence "RQ%u", 0, 4))>; diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 8810feaee297a..81af55edccadb 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -116,6 +116,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { return HasTcgen05 && PTXVersion >= 86; } + // f32x2 instructions in Blackwell family + bool hasF32x2Instructions() const { + return SmVersion >= 100 && PTXVersion >= 86; + } // TMA G2S copy with cta_group::1/2 support bool hasCpAsyncBulkTensorCTAGroupSupport() const { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index aa7850acbd64a..9a6e261c811a0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -129,8 +129,9 @@ class NVPTXTTIImpl final : public BasicTTIImplBase { Insert = false; } } - if (Insert && Isv2x16VT(VT)) { - // Can be built in a single mov + if (Insert && NVPTX::isPackedVectorTy(VT) && VT.is32BitVector()) { + // Can be built in a single 32-bit mov (64-bit regs are emulated in SASS + // with 2x 32-bit regs) Cost += 1; Insert = false; } diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index e792e441e49e6..88d3eefcc521e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -85,16 +85,32 @@ inline unsigned promoteScalarArgumentSize(unsigned size) { bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM); -inline bool Isv2x16VT(EVT VT) { - return (VT == MVT::v2f16 || VT == MVT::v2bf16 || VT == MVT::v2i16); -} - inline bool shouldPassAsArray(Type *Ty) { return Ty->isAggregateType() || Ty->isVectorTy() || Ty->getScalarSizeInBits() == 128 || Ty->isHalfTy() || Ty->isBFloatTy(); } namespace NVPTX { +// Returns a list of vector types that we prefer to fit into a single PTX +// register. NOTE: This must be kept in sync with the register classes +// defined in NVPTXRegisterInfo.td. +inline auto packed_types() { + static const auto PackedTypes = {MVT::v4i8, MVT::v2f16, MVT::v2bf16, + MVT::v2i16, MVT::v2f32}; + return PackedTypes; +} + +// Checks if the type VT can fit into a single register. +inline bool isPackedVectorTy(EVT VT) { + return any_of(packed_types(), [VT](EVT OVT) { return OVT == VT; }); +} + +// Checks if two or more of the type ET can fit into a single register. +inline bool isPackedElementTy(EVT ET) { + return any_of(packed_types(), + [ET](EVT OVT) { return OVT.getVectorElementType() == ET; }); +} + inline std::string getValidPTXIdentifier(StringRef Name) { std::string ValidName; ValidName.reserve(Name.size() + 4); diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index e32d6eab3b977..47329b2c2f4d2 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -45,6 +45,7 @@ add_llvm_target(RISCVCodeGen RISCVInsertVSETVLI.cpp RISCVInsertWriteVXRM.cpp RISCVInstrInfo.cpp + RISCVInterleavedAccess.cpp RISCVISelDAGToDAG.cpp RISCVISelLowering.cpp RISCVLandingPadSetup.cpp diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7e83abe013063..6f31e889a2555 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -438,7 +438,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov() && - !Subtarget.hasVendorXqcicm()) + !Subtarget.hasVendorXqcicm() && !Subtarget.hasVendorXqcics()) setOperationAction(ISD::SELECT, XLenVT, Custom); if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) { @@ -5098,12 +5098,13 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, return convertFromScalableVector(VT, Vec, DAG, Subtarget); } -// Match a mask which "spreads" the leading elements of a vector evenly -// across the result. Factor is the spread amount, and Index is the -// offset applied. (on success, Index < Factor) This is the inverse -// of a deinterleave with the same Factor and Index. This is analogous -// to an interleave, except that all but one lane is undef. -static bool isSpreadMask(ArrayRef Mask, unsigned Factor, unsigned &Index) { +/// Match a mask which "spreads" the leading elements of a vector evenly +/// across the result. Factor is the spread amount, and Index is the +/// offset applied. (on success, Index < Factor) This is the inverse +/// of a deinterleave with the same Factor and Index. This is analogous +/// to an interleave, except that all but one lane is undef. +bool RISCVTargetLowering::isSpreadMask(ArrayRef Mask, unsigned Factor, + unsigned &Index) { SmallVector LaneIsUndef(Factor, true); for (unsigned i = 0; i < Mask.size(); i++) LaneIsUndef[i % Factor] &= (Mask[i] == -1); @@ -6082,7 +6083,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8); for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) { unsigned Index; - if (isSpreadMask(Mask, Factor, Index)) { + if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) { MVT NarrowVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor); SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0); @@ -15993,6 +15994,10 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return SDValue(); uint64_t MulAmt = CNode->getZExtValue(); + // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12. + if (Subtarget.hasVendorXqciac() && isInt<12>(MulAmt)) + return SDValue(); + const bool HasShlAdd = Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa() || Subtarget.hasVendorXAndesPerf(); @@ -23752,6 +23757,10 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, auto *ConstNode = cast(C); const APInt &Imm = ConstNode->getAPIntValue(); + // Don't do this if the Xqciac extension is enabled and the Imm in simm12. + if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12)) + return false; + // Break the MUL to a SLLI and an ADD/SUB. if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) @@ -24080,39 +24089,6 @@ Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { return TargetLowering::getIRStackGuard(IRB); } -bool RISCVTargetLowering::isLegalInterleavedAccessType( - VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, - const DataLayout &DL) const { - EVT VT = getValueType(DL, VTy); - // Don't lower vlseg/vsseg for vector types that can't be split. - if (!isTypeLegal(VT)) - return false; - - if (!isLegalElementTypeForRVV(VT.getScalarType()) || - !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, - Alignment)) - return false; - - MVT ContainerVT = VT.getSimpleVT(); - - if (auto *FVTy = dyn_cast(VTy)) { - if (!Subtarget.useRVVForFixedLengthVectors()) - return false; - // Sometimes the interleaved access pass picks up splats as interleaves of - // one element. Don't lower these. - if (FVTy->getNumElements() < 2) - return false; - - ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT()); - } - - // Need to make sure that EMUL * NFIELDS ≤ 8 - auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT)); - if (Fractional) - return true; - return Factor * LMUL <= 8; -} - bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, Align Alignment) const { if (!Subtarget.hasVInstructions()) @@ -24133,545 +24109,6 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, return true; } -static const Intrinsic::ID FixedVlsegIntrIds[] = { - Intrinsic::riscv_seg2_load_mask, Intrinsic::riscv_seg3_load_mask, - Intrinsic::riscv_seg4_load_mask, Intrinsic::riscv_seg5_load_mask, - Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask, - Intrinsic::riscv_seg8_load_mask}; - -static const Intrinsic::ID ScalableVlsegIntrIds[] = { - Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask, - Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask, - Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask, - Intrinsic::riscv_vlseg8_mask}; - -/// Lower an interleaved load into a vlsegN intrinsic. -/// -/// E.g. Lower an interleaved load (Factor = 2): -/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr -/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements -/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements -/// -/// Into: -/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( -/// %ptr, i64 4) -/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 -/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 -bool RISCVTargetLowering::lowerInterleavedLoad( - LoadInst *LI, ArrayRef Shuffles, - ArrayRef Indices, unsigned Factor) const { - assert(Indices.size() == Shuffles.size()); - - IRBuilder<> Builder(LI); - - const DataLayout &DL = LI->getDataLayout(); - - auto *VTy = cast(Shuffles[0]->getType()); - if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), - LI->getPointerAddressSpace(), DL)) - return false; - - auto *PtrTy = LI->getPointerOperandType(); - auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); - - // If the segment load is going to be performed segment at a time anyways - // and there's only one element used, use a strided load instead. This - // will be equally fast, and create less vector register pressure. - if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) { - unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType()); - Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); - Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes); - Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset); - Value *Mask = Builder.getAllOnesMask(VTy->getElementCount()); - Value *VL = Builder.getInt32(VTy->getNumElements()); - - CallInst *CI = - Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load, - {VTy, BasePtr->getType(), Stride->getType()}, - {BasePtr, Stride, Mask, VL}); - CI->addParamAttr( - 0, Attribute::getWithAlignment(CI->getContext(), LI->getAlign())); - Shuffles[0]->replaceAllUsesWith(CI); - return true; - }; - - Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); - Value *Mask = Builder.getAllOnesMask(VTy->getElementCount()); - CallInst *VlsegN = Builder.CreateIntrinsic( - FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, - {LI->getPointerOperand(), Mask, VL}); - - for (unsigned i = 0; i < Shuffles.size(); i++) { - Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); - Shuffles[i]->replaceAllUsesWith(SubVec); - } - - return true; -} - -static const Intrinsic::ID FixedVssegIntrIds[] = { - Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask, - Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask, - Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask, - Intrinsic::riscv_seg8_store_mask}; - -static const Intrinsic::ID ScalableVssegIntrIds[] = { - Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, - Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, - Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, - Intrinsic::riscv_vsseg8_mask}; - -/// Lower an interleaved store into a vssegN intrinsic. -/// -/// E.g. Lower an interleaved store (Factor = 3): -/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, -/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> -/// store <12 x i32> %i.vec, <12 x i32>* %ptr -/// -/// Into: -/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> -/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> -/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> -/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, -/// %ptr, i32 4) -/// -/// Note that the new shufflevectors will be removed and we'll only generate one -/// vsseg3 instruction in CodeGen. -bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, - ShuffleVectorInst *SVI, - unsigned Factor) const { - IRBuilder<> Builder(SI); - const DataLayout &DL = SI->getDataLayout(); - auto Mask = SVI->getShuffleMask(); - auto *ShuffleVTy = cast(SVI->getType()); - // Given SVI : , then VTy : - auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), - ShuffleVTy->getNumElements() / Factor); - if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), - SI->getPointerAddressSpace(), DL)) - return false; - - auto *PtrTy = SI->getPointerOperandType(); - auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); - - unsigned Index; - // If the segment store only has one active lane (i.e. the interleave is - // just a spread shuffle), we can use a strided store instead. This will - // be equally fast, and create less vector register pressure. - if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) && - isSpreadMask(Mask, Factor, Index)) { - unsigned ScalarSizeInBytes = - DL.getTypeStoreSize(ShuffleVTy->getElementType()); - Value *Data = SVI->getOperand(0); - auto *DataVTy = cast(Data->getType()); - Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); - Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes); - Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset); - Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount()); - Value *VL = Builder.getInt32(VTy->getNumElements()); - - CallInst *CI = Builder.CreateIntrinsic( - Intrinsic::experimental_vp_strided_store, - {Data->getType(), BasePtr->getType(), Stride->getType()}, - {Data, BasePtr, Stride, Mask, VL}); - CI->addParamAttr( - 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign())); - - return true; - } - - Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - SI->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}); - - SmallVector Ops; - SmallVector NewShuffleMask; - - for (unsigned i = 0; i < Factor; i++) { - // Collect shuffle mask for this lane. - for (unsigned j = 0; j < VTy->getNumElements(); j++) - NewShuffleMask.push_back(Mask[i + Factor * j]); - - Value *Shuffle = Builder.CreateShuffleVector( - SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask); - Ops.push_back(Shuffle); - - NewShuffleMask.clear(); - } - // This VL should be OK (should be executable in one vsseg instruction, - // potentially under larger LMULs) because we checked that the fixed vector - // type fits in isLegalInterleavedAccessType - Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); - Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount()); - Ops.append({SI->getPointerOperand(), StoreMask, VL}); - - Builder.CreateCall(VssegNFunc, Ops); - - return true; -} - -bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( - LoadInst *LI, ArrayRef DeinterleaveValues) const { - const unsigned Factor = DeinterleaveValues.size(); - if (Factor > 8) - return false; - - assert(LI->isSimple()); - IRBuilder<> Builder(LI); - - Value *FirstActive = - *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; }); - VectorType *ResVTy = cast(FirstActive->getType()); - - const DataLayout &DL = LI->getDataLayout(); - - if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), - LI->getPointerAddressSpace(), DL)) - return false; - - Value *Return; - Type *PtrTy = LI->getPointerOperandType(); - Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); - - if (auto *FVTy = dyn_cast(ResVTy)) { - Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); - Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount()); - Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], - {ResVTy, PtrTy, XLenTy}, - {LI->getPointerOperand(), Mask, VL}); - } else { - static const Intrinsic::ID IntrIds[] = { - Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, - Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, - Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, - Intrinsic::riscv_vlseg8}; - - unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType()); - unsigned NumElts = ResVTy->getElementCount().getKnownMinValue(); - Type *VecTupTy = TargetExtType::get( - LI->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(LI->getContext()), - NumElts * SEW / 8), - Factor); - - Value *VL = Constant::getAllOnesValue(XLenTy); - - Value *Vlseg = Builder.CreateIntrinsic( - IntrIds[Factor - 2], {VecTupTy, PtrTy, XLenTy}, - {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL, - ConstantInt::get(XLenTy, Log2_64(SEW))}); - - SmallVector AggrTypes{Factor, ResVTy}; - Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes)); - for (unsigned i = 0; i < Factor; ++i) { - Value *VecExtract = Builder.CreateIntrinsic( - Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy}, - {Vlseg, Builder.getInt32(i)}); - Return = Builder.CreateInsertValue(Return, VecExtract, i); - } - } - - for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) { - if (!DIV) - continue; - // We have to create a brand new ExtractValue to replace each - // of these old ExtractValue instructions. - Value *NewEV = - Builder.CreateExtractValue(Return, {static_cast(Idx)}); - DIV->replaceAllUsesWith(NewEV); - } - - return true; -} - -bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( - StoreInst *SI, ArrayRef InterleaveValues) const { - unsigned Factor = InterleaveValues.size(); - if (Factor > 8) - return false; - - assert(SI->isSimple()); - IRBuilder<> Builder(SI); - - auto *InVTy = cast(InterleaveValues[0]->getType()); - auto *PtrTy = SI->getPointerOperandType(); - const DataLayout &DL = SI->getDataLayout(); - - if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), - SI->getPointerAddressSpace(), DL)) - return false; - - Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); - - if (auto *FVTy = dyn_cast(InVTy)) { - Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, PtrTy, XLenTy}); - - SmallVector Ops(InterleaveValues); - Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); - Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount()); - Ops.append({SI->getPointerOperand(), Mask, VL}); - - Builder.CreateCall(VssegNFunc, Ops); - } else { - static const Intrinsic::ID IntrIds[] = { - Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, - Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, - Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, - Intrinsic::riscv_vsseg8}; - - unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType()); - unsigned NumElts = InVTy->getElementCount().getKnownMinValue(); - Type *VecTupTy = TargetExtType::get( - SI->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(SI->getContext()), - NumElts * SEW / 8), - Factor); - - Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - SI->getModule(), IntrIds[Factor - 2], {VecTupTy, PtrTy, XLenTy}); - - Value *VL = Constant::getAllOnesValue(XLenTy); - - Value *StoredVal = PoisonValue::get(VecTupTy); - for (unsigned i = 0; i < Factor; ++i) - StoredVal = Builder.CreateIntrinsic( - Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy}, - {StoredVal, InterleaveValues[i], Builder.getInt32(i)}); - - Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL, - ConstantInt::get(XLenTy, Log2_64(SEW))}); - } - - return true; -} - -static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { - assert(N); - if (N == 1) - return true; - - using namespace PatternMatch; - // Right now we're only recognizing the simplest pattern. - uint64_t C; - if (match(V, m_CombineOr(m_ConstantInt(C), - m_c_Mul(m_Value(), m_ConstantInt(C)))) && - C && C % N == 0) - return true; - - if (isPowerOf2_32(N)) { - KnownBits KB = llvm::computeKnownBits(V, DL); - return KB.countMinTrailingZeros() >= Log2_32(N); - } - - return false; -} - -/// Lower an interleaved vp.load into a vlsegN intrinsic. -/// -/// E.g. Lower an interleaved vp.load (Factor = 2): -/// %l = call @llvm.vp.load.nxv64i8.p0(ptr %ptr, -/// %mask, -/// i32 %wide.rvl) -/// %dl = tail call { , } -/// @llvm.vector.deinterleave2.nxv64i8( -/// %l) -/// %r0 = extractvalue { , } %dl, 0 -/// %r1 = extractvalue { , } %dl, 1 -/// -/// Into: -/// %rvl = udiv %wide.rvl, 2 -/// %sl = call { , } -/// @llvm.riscv.vlseg2.mask.nxv32i8.i64( undef, -/// undef, -/// ptr %ptr, -/// %mask, -/// i64 %rvl, -/// i64 1) -/// %r0 = extractvalue { , } %sl, 0 -/// %r1 = extractvalue { , } %sl, 1 -/// -/// NOTE: the deinterleave2 intrinsic won't be touched and is expected to be -/// removed by the caller -/// TODO: We probably can loosen the dependency on matching extractvalue when -/// dealing with factor of 2 (extractvalue is still required for most of other -/// factors though). -bool RISCVTargetLowering::lowerInterleavedVPLoad( - VPIntrinsic *Load, Value *Mask, - ArrayRef DeinterleaveResults) const { - const unsigned Factor = DeinterleaveResults.size(); - assert(Mask && "Expect a valid mask"); - assert(Load->getIntrinsicID() == Intrinsic::vp_load && - "Unexpected intrinsic"); - - Value *FirstActive = *llvm::find_if(DeinterleaveResults, - [](Value *V) { return V != nullptr; }); - VectorType *VTy = cast(FirstActive->getType()); - - auto &DL = Load->getModule()->getDataLayout(); - Align Alignment = Load->getParamAlign(0).value_or( - DL.getABITypeAlign(VTy->getElementType())); - if (!isLegalInterleavedAccessType( - VTy, Factor, Alignment, - Load->getArgOperand(0)->getType()->getPointerAddressSpace(), DL)) - return false; - - IRBuilder<> Builder(Load); - - Value *WideEVL = Load->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor)) - return false; - - auto *PtrTy = Load->getArgOperand(0)->getType(); - auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); - Value *EVL = Builder.CreateZExt( - Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)), - XLenTy); - - Value *Return = nullptr; - if (auto *FVTy = dyn_cast(VTy)) { - Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], - {FVTy, PtrTy, XLenTy}, - {Load->getArgOperand(0), Mask, EVL}); - } else { - unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType()); - unsigned NumElts = VTy->getElementCount().getKnownMinValue(); - Type *VecTupTy = TargetExtType::get( - Load->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(Load->getContext()), - NumElts * SEW / 8), - Factor); - - Value *PoisonVal = PoisonValue::get(VecTupTy); - - Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration( - Load->getModule(), ScalableVlsegIntrIds[Factor - 2], - {VecTupTy, PtrTy, Mask->getType(), EVL->getType()}); - - Value *Operands[] = { - PoisonVal, - Load->getArgOperand(0), - Mask, - EVL, - ConstantInt::get(XLenTy, - RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC), - ConstantInt::get(XLenTy, Log2_64(SEW))}; - - CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands); - - SmallVector AggrTypes{Factor, VTy}; - Return = PoisonValue::get(StructType::get(Load->getContext(), AggrTypes)); - Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration( - Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy}); - for (unsigned i = 0; i < Factor; ++i) { - Value *VecExtract = - Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)}); - Return = Builder.CreateInsertValue(Return, VecExtract, i); - } - } - - for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) { - if (!DIO) - continue; - // We have to create a brand new ExtractValue to replace each - // of these old ExtractValue instructions. - Value *NewEV = - Builder.CreateExtractValue(Return, {static_cast(Idx)}); - DIO->replaceAllUsesWith(NewEV); - } - - return true; -} - -/// Lower an interleaved vp.store into a vssegN intrinsic. -/// -/// E.g. Lower an interleaved vp.store (Factor = 2): -/// -/// %is = tail call -/// @llvm.vector.interleave2.nxv64i8( -/// %load0, -/// %load1 -/// %wide.rvl = shl nuw nsw i32 %rvl, 1 -/// tail call void @llvm.vp.store.nxv64i8.p0( -/// %is, ptr %ptr, -/// %mask, -/// i32 %wide.rvl) -/// -/// Into: -/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64( -/// %load1, -/// %load2, ptr %ptr, -/// %mask, -/// i64 %rvl) -bool RISCVTargetLowering::lowerInterleavedVPStore( - VPIntrinsic *Store, Value *Mask, - ArrayRef InterleaveOperands) const { - assert(Mask && "Expect a valid mask"); - assert(Store->getIntrinsicID() == Intrinsic::vp_store && - "Unexpected intrinsic"); - - const unsigned Factor = InterleaveOperands.size(); - - auto *VTy = dyn_cast(InterleaveOperands[0]->getType()); - if (!VTy) - return false; - - const DataLayout &DL = Store->getDataLayout(); - Align Alignment = Store->getParamAlign(1).value_or( - DL.getABITypeAlign(VTy->getElementType())); - if (!isLegalInterleavedAccessType( - VTy, Factor, Alignment, - Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL)) - return false; - - IRBuilder<> Builder(Store); - Value *WideEVL = Store->getArgOperand(3); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor)) - return false; - - auto *PtrTy = Store->getArgOperand(1)->getType(); - auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); - Value *EVL = Builder.CreateZExt( - Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)), - XLenTy); - - if (auto *FVTy = dyn_cast(VTy)) { - SmallVector Operands(InterleaveOperands); - Operands.append({Store->getArgOperand(1), Mask, EVL}); - Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2], - {FVTy, PtrTy, XLenTy}, Operands); - return true; - } - - unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType()); - unsigned NumElts = VTy->getElementCount().getKnownMinValue(); - Type *VecTupTy = TargetExtType::get( - Store->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(Store->getContext()), - NumElts * SEW / 8), - Factor); - - Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration( - Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy}); - Value *StoredVal = PoisonValue::get(VecTupTy); - for (unsigned i = 0; i < Factor; ++i) - StoredVal = Builder.CreateCall( - VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)}); - - Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - Store->getModule(), ScalableVssegIntrIds[Factor - 2], - {VecTupTy, PtrTy, Mask->getType(), EVL->getType()}); - - Value *Operands[] = {StoredVal, Store->getArgOperand(1), Mask, EVL, - ConstantInt::get(XLenTy, Log2_64(SEW))}; - - Builder.CreateCall(VssegNFunc, Operands); - return true; -} - MachineInstr * RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index bcbda30342b80..00e969056df7d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -468,6 +468,12 @@ class RISCVTargetLowering : public TargetLowering { ArrayRef getRoundingControlRegisters() const override; + /// Match a mask which "spreads" the leading elements of a vector evenly + /// across the result. Factor is the spread amount, and Index is the + /// offset applied. + static bool isSpreadMask(ArrayRef Mask, unsigned Factor, + unsigned &Index); + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, bool IsRet, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index b2bf09028bc40..9e3eb1c03fb37 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1312,6 +1312,30 @@ class QCIMVCCIPat : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rs1), InTyImm:$imm, Cond)), (XLenVT GPRNoX0:$rs3), (XLenVT GPRNoX0:$rd)), (Inst GPRNoX0:$rd, GPRNoX0:$rs1, InTyImm:$imm, GPRNoX0:$rs3)>; +class QCISELECTCCIPat + : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), simm5:$imm, Cond)), (XLenVT GPRNoX0:$rs2), (XLenVT GPRNoX0:$rs3)), + (Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, GPRNoX0:$rs3)>; + +class QCISELECTICCIPat + : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), simm5:$imm, Cond)), (XLenVT GPRNoX0:$rs2), simm5:$simm2), + (Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, simm5:$simm2)>; + +class QCISELECTICCIPatInv + : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), simm5:$imm, Cond)), simm5:$simm2, (XLenVT GPRNoX0:$rs2)), + (Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, simm5:$simm2)>; + +class QCISELECTICCPat + : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs1), Cond)), (XLenVT GPRNoX0:$rs2), simm5:$simm2), + (Inst GPRNoX0:$rd, GPRNoX0:$rs1, GPRNoX0:$rs2, simm5:$simm2)>; + +class QCISELECTICCPatInv + : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs1), Cond)), simm5:$simm2, (XLenVT GPRNoX0:$rs2)), + (Inst GPRNoX0:$rd, GPRNoX0:$rs1, GPRNoX0:$rs2, simm5:$simm2)>; + +class QCISELECTIICCPat + : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs1), Cond)), simm5:$simm1, simm5:$simm2), + (Inst GPRNoX0:$rd, GPRNoX0:$rs1, simm5:$simm1, simm5:$simm2)>; + // Match `riscv_brcc` and lower to the appropriate XQCIBI branch instruction. class BcciPat : Pat<(riscv_brcc (XLenVT GPRNoX0:$rs1), InTyImm:$rs2, Cond, bb:$imm12), @@ -1332,6 +1356,11 @@ class SelectQCbi (OpNode GPRNoX0:$lhs, InTyImm:$Constant, (IntCCtoRISCVCC $cc), GPRNoX0:$truev, GPRNoX0:$falsev)>; +let Predicates = [HasVendorXqciac, IsRV32] in { +def : Pat<(XLenVT (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12:$imm12))), + (QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12:$imm12)>; +} // Predicates = [HasVendorXqciac, IsRV32] + /// Simple arithmetic operations let Predicates = [HasVendorXqcilia, IsRV32] in { @@ -1461,6 +1490,37 @@ def : QCIMVCCIPat ; def : QCIMVCCIPat ; } +let Predicates = [HasVendorXqcics, IsRV32] in { +def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs2),(XLenVT GPRNoX0:$rs3)), + (QC_SELECTNEI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs2, GPRNoX0:$rs3)>; +def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs2), simm5:$simm2), + (QC_SELECTINEI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs2, simm5:$simm2)>; +def : Pat<(select (XLenVT GPRNoX0:$rd), simm5:$simm2,(XLenVT GPRNoX0:$rs2)), + (QC_SELECTIEQI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs2, simm5:$simm2)>; + +// Below AddedComplexity is added to prefer these conditional select instructions over +// conditional move instructions +let AddedComplexity = 1 in { +def : QCISELECTCCIPat ; +def : QCISELECTCCIPat ; +} + +def : QCISELECTICCIPat ; +def : QCISELECTICCIPat ; + +def : QCISELECTICCIPatInv ; +def : QCISELECTICCIPatInv ; + +def : QCISELECTICCPat ; +def : QCISELECTICCPat ; + +def : QCISELECTICCPatInv ; +def : QCISELECTICCPatInv ; + +def : QCISELECTIICCPat ; +def : QCISELECTIICCPat ; +} // Predicates = [HasVendorXqcics, IsRV32] + //===----------------------------------------------------------------------===/i // Compress Instruction tablegen backend. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp new file mode 100644 index 0000000000000..a6ff22c4b391f --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -0,0 +1,596 @@ +//===-- RISCVInterleavedAccess.cpp - RISC-V Interleaved Access Transform --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Functions and callbacks related to the InterleavedAccessPass. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVISelLowering.h" +#include "RISCVSubtarget.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsRISCV.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" + +using namespace llvm; + +bool RISCVTargetLowering::isLegalInterleavedAccessType( + VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, + const DataLayout &DL) const { + EVT VT = getValueType(DL, VTy); + // Don't lower vlseg/vsseg for vector types that can't be split. + if (!isTypeLegal(VT)) + return false; + + if (!isLegalElementTypeForRVV(VT.getScalarType()) || + !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, + Alignment)) + return false; + + MVT ContainerVT = VT.getSimpleVT(); + + if (auto *FVTy = dyn_cast(VTy)) { + if (!Subtarget.useRVVForFixedLengthVectors()) + return false; + // Sometimes the interleaved access pass picks up splats as interleaves of + // one element. Don't lower these. + if (FVTy->getNumElements() < 2) + return false; + + ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT()); + } + + // Need to make sure that EMUL * NFIELDS ≤ 8 + auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT)); + if (Fractional) + return true; + return Factor * LMUL <= 8; +} + +static const Intrinsic::ID FixedVlsegIntrIds[] = { + Intrinsic::riscv_seg2_load_mask, Intrinsic::riscv_seg3_load_mask, + Intrinsic::riscv_seg4_load_mask, Intrinsic::riscv_seg5_load_mask, + Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask, + Intrinsic::riscv_seg8_load_mask}; + +static const Intrinsic::ID ScalableVlsegIntrIds[] = { + Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask, + Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask, + Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask, + Intrinsic::riscv_vlseg8_mask}; + +/// Lower an interleaved load into a vlsegN intrinsic. +/// +/// E.g. Lower an interleaved load (Factor = 2): +/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr +/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements +/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements +/// +/// Into: +/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( +/// %ptr, i64 4) +/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 +/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 +bool RISCVTargetLowering::lowerInterleavedLoad( + LoadInst *LI, ArrayRef Shuffles, + ArrayRef Indices, unsigned Factor) const { + assert(Indices.size() == Shuffles.size()); + + IRBuilder<> Builder(LI); + + const DataLayout &DL = LI->getDataLayout(); + + auto *VTy = cast(Shuffles[0]->getType()); + if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), + LI->getPointerAddressSpace(), DL)) + return false; + + auto *PtrTy = LI->getPointerOperandType(); + auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); + + // If the segment load is going to be performed segment at a time anyways + // and there's only one element used, use a strided load instead. This + // will be equally fast, and create less vector register pressure. + if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) { + unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType()); + Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); + Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes); + Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset); + Value *Mask = Builder.getAllOnesMask(VTy->getElementCount()); + Value *VL = Builder.getInt32(VTy->getNumElements()); + + CallInst *CI = + Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load, + {VTy, BasePtr->getType(), Stride->getType()}, + {BasePtr, Stride, Mask, VL}); + CI->addParamAttr( + 0, Attribute::getWithAlignment(CI->getContext(), LI->getAlign())); + Shuffles[0]->replaceAllUsesWith(CI); + return true; + }; + + Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); + Value *Mask = Builder.getAllOnesMask(VTy->getElementCount()); + CallInst *VlsegN = Builder.CreateIntrinsic( + FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, + {LI->getPointerOperand(), Mask, VL}); + + for (unsigned i = 0; i < Shuffles.size(); i++) { + Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); + Shuffles[i]->replaceAllUsesWith(SubVec); + } + + return true; +} + +static const Intrinsic::ID FixedVssegIntrIds[] = { + Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask, + Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask, + Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask, + Intrinsic::riscv_seg8_store_mask}; + +static const Intrinsic::ID ScalableVssegIntrIds[] = { + Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, + Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, + Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, + Intrinsic::riscv_vsseg8_mask}; + +/// Lower an interleaved store into a vssegN intrinsic. +/// +/// E.g. Lower an interleaved store (Factor = 3): +/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, +/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> +/// store <12 x i32> %i.vec, <12 x i32>* %ptr +/// +/// Into: +/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> +/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> +/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> +/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, +/// %ptr, i32 4) +/// +/// Note that the new shufflevectors will be removed and we'll only generate one +/// vsseg3 instruction in CodeGen. +bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, + ShuffleVectorInst *SVI, + unsigned Factor) const { + IRBuilder<> Builder(SI); + const DataLayout &DL = SI->getDataLayout(); + auto Mask = SVI->getShuffleMask(); + auto *ShuffleVTy = cast(SVI->getType()); + // Given SVI : , then VTy : + auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), + ShuffleVTy->getNumElements() / Factor); + if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), + SI->getPointerAddressSpace(), DL)) + return false; + + auto *PtrTy = SI->getPointerOperandType(); + auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); + + unsigned Index; + // If the segment store only has one active lane (i.e. the interleave is + // just a spread shuffle), we can use a strided store instead. This will + // be equally fast, and create less vector register pressure. + if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) && + isSpreadMask(Mask, Factor, Index)) { + unsigned ScalarSizeInBytes = + DL.getTypeStoreSize(ShuffleVTy->getElementType()); + Value *Data = SVI->getOperand(0); + auto *DataVTy = cast(Data->getType()); + Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); + Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes); + Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset); + Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount()); + Value *VL = Builder.getInt32(VTy->getNumElements()); + + CallInst *CI = Builder.CreateIntrinsic( + Intrinsic::experimental_vp_strided_store, + {Data->getType(), BasePtr->getType(), Stride->getType()}, + {Data, BasePtr, Stride, Mask, VL}); + CI->addParamAttr( + 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign())); + + return true; + } + + Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( + SI->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}); + + SmallVector Ops; + SmallVector NewShuffleMask; + + for (unsigned i = 0; i < Factor; i++) { + // Collect shuffle mask for this lane. + for (unsigned j = 0; j < VTy->getNumElements(); j++) + NewShuffleMask.push_back(Mask[i + Factor * j]); + + Value *Shuffle = Builder.CreateShuffleVector( + SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask); + Ops.push_back(Shuffle); + + NewShuffleMask.clear(); + } + // This VL should be OK (should be executable in one vsseg instruction, + // potentially under larger LMULs) because we checked that the fixed vector + // type fits in isLegalInterleavedAccessType + Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); + Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount()); + Ops.append({SI->getPointerOperand(), StoreMask, VL}); + + Builder.CreateCall(VssegNFunc, Ops); + + return true; +} + +bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( + LoadInst *LI, ArrayRef DeinterleaveValues) const { + const unsigned Factor = DeinterleaveValues.size(); + if (Factor > 8) + return false; + + assert(LI->isSimple()); + IRBuilder<> Builder(LI); + + Value *FirstActive = + *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; }); + VectorType *ResVTy = cast(FirstActive->getType()); + + const DataLayout &DL = LI->getDataLayout(); + + if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), + LI->getPointerAddressSpace(), DL)) + return false; + + Value *Return; + Type *PtrTy = LI->getPointerOperandType(); + Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); + + if (auto *FVTy = dyn_cast(ResVTy)) { + Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); + Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount()); + Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], + {ResVTy, PtrTy, XLenTy}, + {LI->getPointerOperand(), Mask, VL}); + } else { + static const Intrinsic::ID IntrIds[] = { + Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, + Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, + Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, + Intrinsic::riscv_vlseg8}; + + unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType()); + unsigned NumElts = ResVTy->getElementCount().getKnownMinValue(); + Type *VecTupTy = TargetExtType::get( + LI->getContext(), "riscv.vector.tuple", + ScalableVectorType::get(Type::getInt8Ty(LI->getContext()), + NumElts * SEW / 8), + Factor); + + Value *VL = Constant::getAllOnesValue(XLenTy); + + Value *Vlseg = Builder.CreateIntrinsic( + IntrIds[Factor - 2], {VecTupTy, PtrTy, XLenTy}, + {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL, + ConstantInt::get(XLenTy, Log2_64(SEW))}); + + SmallVector AggrTypes{Factor, ResVTy}; + Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes)); + for (unsigned i = 0; i < Factor; ++i) { + Value *VecExtract = Builder.CreateIntrinsic( + Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy}, + {Vlseg, Builder.getInt32(i)}); + Return = Builder.CreateInsertValue(Return, VecExtract, i); + } + } + + for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) { + if (!DIV) + continue; + // We have to create a brand new ExtractValue to replace each + // of these old ExtractValue instructions. + Value *NewEV = + Builder.CreateExtractValue(Return, {static_cast(Idx)}); + DIV->replaceAllUsesWith(NewEV); + } + + return true; +} + +bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( + StoreInst *SI, ArrayRef InterleaveValues) const { + unsigned Factor = InterleaveValues.size(); + if (Factor > 8) + return false; + + assert(SI->isSimple()); + IRBuilder<> Builder(SI); + + auto *InVTy = cast(InterleaveValues[0]->getType()); + auto *PtrTy = SI->getPointerOperandType(); + const DataLayout &DL = SI->getDataLayout(); + + if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), + SI->getPointerAddressSpace(), DL)) + return false; + + Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); + + if (auto *FVTy = dyn_cast(InVTy)) { + Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( + SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, PtrTy, XLenTy}); + + SmallVector Ops(InterleaveValues); + Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); + Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount()); + Ops.append({SI->getPointerOperand(), Mask, VL}); + + Builder.CreateCall(VssegNFunc, Ops); + } else { + static const Intrinsic::ID IntrIds[] = { + Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, + Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, + Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, + Intrinsic::riscv_vsseg8}; + + unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType()); + unsigned NumElts = InVTy->getElementCount().getKnownMinValue(); + Type *VecTupTy = TargetExtType::get( + SI->getContext(), "riscv.vector.tuple", + ScalableVectorType::get(Type::getInt8Ty(SI->getContext()), + NumElts * SEW / 8), + Factor); + + Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( + SI->getModule(), IntrIds[Factor - 2], {VecTupTy, PtrTy, XLenTy}); + + Value *VL = Constant::getAllOnesValue(XLenTy); + + Value *StoredVal = PoisonValue::get(VecTupTy); + for (unsigned i = 0; i < Factor; ++i) + StoredVal = Builder.CreateIntrinsic( + Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy}, + {StoredVal, InterleaveValues[i], Builder.getInt32(i)}); + + Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL, + ConstantInt::get(XLenTy, Log2_64(SEW))}); + } + + return true; +} + +static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { + assert(N); + if (N == 1) + return true; + + using namespace PatternMatch; + // Right now we're only recognizing the simplest pattern. + uint64_t C; + if (match(V, m_CombineOr(m_ConstantInt(C), + m_c_Mul(m_Value(), m_ConstantInt(C)))) && + C && C % N == 0) + return true; + + if (isPowerOf2_32(N)) { + KnownBits KB = llvm::computeKnownBits(V, DL); + return KB.countMinTrailingZeros() >= Log2_32(N); + } + + return false; +} + +/// Lower an interleaved vp.load into a vlsegN intrinsic. +/// +/// E.g. Lower an interleaved vp.load (Factor = 2): +/// %l = call @llvm.vp.load.nxv64i8.p0(ptr %ptr, +/// %mask, +/// i32 %wide.rvl) +/// %dl = tail call { , } +/// @llvm.vector.deinterleave2.nxv64i8( +/// %l) +/// %r0 = extractvalue { , } %dl, 0 +/// %r1 = extractvalue { , } %dl, 1 +/// +/// Into: +/// %rvl = udiv %wide.rvl, 2 +/// %sl = call { , } +/// @llvm.riscv.vlseg2.mask.nxv32i8.i64( undef, +/// undef, +/// ptr %ptr, +/// %mask, +/// i64 %rvl, +/// i64 1) +/// %r0 = extractvalue { , } %sl, 0 +/// %r1 = extractvalue { , } %sl, 1 +/// +/// NOTE: the deinterleave2 intrinsic won't be touched and is expected to be +/// removed by the caller +/// TODO: We probably can loosen the dependency on matching extractvalue when +/// dealing with factor of 2 (extractvalue is still required for most of other +/// factors though). +bool RISCVTargetLowering::lowerInterleavedVPLoad( + VPIntrinsic *Load, Value *Mask, + ArrayRef DeinterleaveResults) const { + const unsigned Factor = DeinterleaveResults.size(); + assert(Mask && "Expect a valid mask"); + assert(Load->getIntrinsicID() == Intrinsic::vp_load && + "Unexpected intrinsic"); + + Value *FirstActive = *llvm::find_if(DeinterleaveResults, + [](Value *V) { return V != nullptr; }); + VectorType *VTy = cast(FirstActive->getType()); + + auto &DL = Load->getModule()->getDataLayout(); + Align Alignment = Load->getParamAlign(0).value_or( + DL.getABITypeAlign(VTy->getElementType())); + if (!isLegalInterleavedAccessType( + VTy, Factor, Alignment, + Load->getArgOperand(0)->getType()->getPointerAddressSpace(), DL)) + return false; + + IRBuilder<> Builder(Load); + + Value *WideEVL = Load->getVectorLengthParam(); + // Conservatively check if EVL is a multiple of factor, otherwise some + // (trailing) elements might be lost after the transformation. + if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor)) + return false; + + auto *PtrTy = Load->getArgOperand(0)->getType(); + auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); + Value *EVL = Builder.CreateZExt( + Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)), + XLenTy); + + Value *Return = nullptr; + if (auto *FVTy = dyn_cast(VTy)) { + Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], + {FVTy, PtrTy, XLenTy}, + {Load->getArgOperand(0), Mask, EVL}); + } else { + unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType()); + unsigned NumElts = VTy->getElementCount().getKnownMinValue(); + Type *VecTupTy = TargetExtType::get( + Load->getContext(), "riscv.vector.tuple", + ScalableVectorType::get(Type::getInt8Ty(Load->getContext()), + NumElts * SEW / 8), + Factor); + + Value *PoisonVal = PoisonValue::get(VecTupTy); + + Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration( + Load->getModule(), ScalableVlsegIntrIds[Factor - 2], + {VecTupTy, PtrTy, Mask->getType(), EVL->getType()}); + + Value *Operands[] = { + PoisonVal, + Load->getArgOperand(0), + Mask, + EVL, + ConstantInt::get(XLenTy, + RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC), + ConstantInt::get(XLenTy, Log2_64(SEW))}; + + CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands); + + SmallVector AggrTypes{Factor, VTy}; + Return = PoisonValue::get(StructType::get(Load->getContext(), AggrTypes)); + Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration( + Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy}); + for (unsigned i = 0; i < Factor; ++i) { + Value *VecExtract = + Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)}); + Return = Builder.CreateInsertValue(Return, VecExtract, i); + } + } + + for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) { + if (!DIO) + continue; + // We have to create a brand new ExtractValue to replace each + // of these old ExtractValue instructions. + Value *NewEV = + Builder.CreateExtractValue(Return, {static_cast(Idx)}); + DIO->replaceAllUsesWith(NewEV); + } + + return true; +} + +/// Lower an interleaved vp.store into a vssegN intrinsic. +/// +/// E.g. Lower an interleaved vp.store (Factor = 2): +/// +/// %is = tail call +/// @llvm.vector.interleave2.nxv64i8( +/// %load0, +/// %load1 +/// %wide.rvl = shl nuw nsw i32 %rvl, 1 +/// tail call void @llvm.vp.store.nxv64i8.p0( +/// %is, ptr %ptr, +/// %mask, +/// i32 %wide.rvl) +/// +/// Into: +/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64( +/// %load1, +/// %load2, ptr %ptr, +/// %mask, +/// i64 %rvl) +bool RISCVTargetLowering::lowerInterleavedVPStore( + VPIntrinsic *Store, Value *Mask, + ArrayRef InterleaveOperands) const { + assert(Mask && "Expect a valid mask"); + assert(Store->getIntrinsicID() == Intrinsic::vp_store && + "Unexpected intrinsic"); + + const unsigned Factor = InterleaveOperands.size(); + + auto *VTy = dyn_cast(InterleaveOperands[0]->getType()); + if (!VTy) + return false; + + const DataLayout &DL = Store->getDataLayout(); + Align Alignment = Store->getParamAlign(1).value_or( + DL.getABITypeAlign(VTy->getElementType())); + if (!isLegalInterleavedAccessType( + VTy, Factor, Alignment, + Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL)) + return false; + + IRBuilder<> Builder(Store); + Value *WideEVL = Store->getArgOperand(3); + // Conservatively check if EVL is a multiple of factor, otherwise some + // (trailing) elements might be lost after the transformation. + if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor)) + return false; + + auto *PtrTy = Store->getArgOperand(1)->getType(); + auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); + Value *EVL = Builder.CreateZExt( + Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)), + XLenTy); + + if (auto *FVTy = dyn_cast(VTy)) { + SmallVector Operands(InterleaveOperands); + Operands.append({Store->getArgOperand(1), Mask, EVL}); + Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2], + {FVTy, PtrTy, XLenTy}, Operands); + return true; + } + + unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType()); + unsigned NumElts = VTy->getElementCount().getKnownMinValue(); + Type *VecTupTy = TargetExtType::get( + Store->getContext(), "riscv.vector.tuple", + ScalableVectorType::get(Type::getInt8Ty(Store->getContext()), + NumElts * SEW / 8), + Factor); + + Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration( + Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy}); + Value *StoredVal = PoisonValue::get(VecTupTy); + for (unsigned i = 0; i < Factor; ++i) + StoredVal = Builder.CreateCall( + VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)}); + + Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( + Store->getModule(), ScalableVssegIntrIds[Factor - 2], + {VecTupTy, PtrTy, Mask->getType(), EVL->getType()}); + + Value *Operands[] = {StoredVal, Store->getArgOperand(1), Mask, EVL, + ConstantInt::get(XLenTy, Log2_64(SEW))}; + + Builder.CreateCall(VssegNFunc, Operands); + return true; +} diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 24c05a2f807d0..d257f56cf4129 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -323,6 +323,12 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, Worklist.push_back(std::make_pair(UserMI, Bits)); break; + case RISCV::BREV8: + case RISCV::ORC_B: + // BREV8 and ORC_B work on bytes. Round Bits down to the nearest byte. + Worklist.push_back(std::make_pair(UserMI, alignDown(Bits, 8))); + break; + case RISCV::PseudoCCMOVGPR: case RISCV::PseudoCCMOVGPRNoX0: // Either operand 4 or operand 5 is returned by this instruction. If diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 07907298386c3..84ef53985484f 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -521,16 +521,23 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const { /// Check if it's safe to move From down to To, checking that no physical /// registers are clobbered. static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) { - assert(From.getParent() == To.getParent() && !From.hasImplicitDef()); - SmallVector PhysUses; + assert(From.getParent() == To.getParent()); + SmallVector PhysUses, PhysDefs; for (const MachineOperand &MO : From.all_uses()) if (MO.getReg().isPhysical()) PhysUses.push_back(MO.getReg()); + for (const MachineOperand &MO : From.all_defs()) + if (MO.getReg().isPhysical()) + PhysDefs.push_back(MO.getReg()); bool SawStore = false; - for (auto II = From.getIterator(); II != To.getIterator(); II++) { + for (auto II = std::next(From.getIterator()); II != To.getIterator(); II++) { for (Register PhysReg : PhysUses) if (II->definesRegister(PhysReg, nullptr)) return false; + for (Register PhysReg : PhysDefs) + if (II->definesRegister(PhysReg, nullptr) || + II->readsRegister(PhysReg, nullptr)) + return false; if (II->mayStore()) { SawStore = true; break; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index b2ea784057780..ec95e86e4fe3d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -35,10 +35,8 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/PatternMatch.h" using namespace llvm; -using namespace PatternMatch; #define DEBUG_TYPE "wasm-fastisel" diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 8e304c07ed5cb..7fe58539cd4ec 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -108,6 +109,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .legalFor(HasSSE2 || UseX87, {s64}) .legalFor(UseX87, {s80}); + getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32}); + // merge/unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; @@ -611,6 +614,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, return legalizeSITOFP(MI, MRI, Helper); case TargetOpcode::G_FPTOSI: return legalizeFPTOSI(MI, MRI, Helper); + case TargetOpcode::G_GET_ROUNDING: + return legalizeGETROUNDING(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); } @@ -777,6 +782,82 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI, return true; } +bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + /* + The rounding mode is in bits 11:10 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to -inf + 10 Round to +inf + 11 Round to 0 + + GET_ROUNDING, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we use a packed lookup table of the four 2-bit + values that we can index by FPSP[11:10] + 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10] + + (0x2d >> ((FPSR >> 9) & 6)) & 3 + */ + + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineFunction &MF = MIRBuilder.getMF(); + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); + + // Save FP Control Word to stack slot + int MemSize = 2; + Align Alignment = Align(2); + MachinePointerInfo PtrInfo; + auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize), + Alignment, PtrInfo); + Register StackPtr = StackTemp.getReg(0); + + auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MemSize, Alignment); + + // Store FP Control Word to stack slot using G_FNSTCW16 + MIRBuilder.buildInstr(X86::G_FNSTCW16) + .addUse(StackPtr) + .addMemOperand(StoreMMO); + + // Load FP Control Word from stack slot + auto LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MemSize, Alignment); + + auto CWD32 = + MIRBuilder.buildZExt(s32, MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO)); + auto Shifted8 = MIRBuilder.buildTrunc( + s8, MIRBuilder.buildLShr(s32, CWD32, MIRBuilder.buildConstant(s8, 9))); + auto Masked32 = MIRBuilder.buildZExt( + s32, MIRBuilder.buildAnd(s8, Shifted8, MIRBuilder.buildConstant(s8, 6))); + + // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding + // mode (from bits 11:10 of the control word) to the values expected by + // GET_ROUNDING. The mapping is performed by shifting LUT right by the + // extracted rounding mode and masking the result with 3 to obtain the final + auto LUT = MIRBuilder.buildConstant(s32, 0x2d); + auto LUTShifted = MIRBuilder.buildLShr(s32, LUT, Masked32); + auto RetVal = + MIRBuilder.buildAnd(s32, LUTShifted, MIRBuilder.buildConstant(s32, 3)); + auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal); + + MIRBuilder.buildCopy(Dst, RetValTrunc); + + MI.eraseFromParent(); + return true; +} + bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { return true; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 1ba82674ed4c6..0003552d70ee0 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -54,6 +54,9 @@ class X86LegalizerInfo : public LegalizerInfo { bool legalizeFPTOSI(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + + bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; }; } // namespace llvm #endif diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 772e48efb8607..990b381341f07 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1331,7 +1331,7 @@ def ProcessorFeatures { // Pantherlake list PTLAdditionalFeatures = [FeaturePREFETCHI]; list PTLFeatures = - !listconcat(ARLSFeatures, PTLAdditionalFeatures); + !listremove(!listconcat(ARLSFeatures, PTLAdditionalFeatures), [FeatureWIDEKL]); // Clearwaterforest @@ -1342,7 +1342,7 @@ def ProcessorFeatures { FeatureSM4, FeatureUSERMSR]; list CWFFeatures = - !listconcat(SRFFeatures, CWFAdditionalFeatures); + !listremove(!listconcat(SRFFeatures, CWFAdditionalFeatures), [FeatureWIDEKL]); // Knights Landing list KNLFeatures = [FeatureX87, diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 50c20fcde49ce..d406277e440bb 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -192,9 +192,9 @@ void X86AsmPrinter::emitKCFITypeId(const MachineFunction &MF) { unsigned DestReg = X86::EAX; if (F.getParent()->getModuleFlag("kcfi-arity")) { - // The ArityToRegMap assumes the 64-bit Linux kernel ABI + // The ArityToRegMap assumes the 64-bit SysV ABI. [[maybe_unused]] const auto &Triple = MF.getTarget().getTargetTriple(); - assert(Triple.isArch64Bit() && Triple.isOSLinux()); + assert(Triple.isArch64Bit() && !Triple.isOSWindows()); // Determine the function's arity (i.e., the number of arguments) at the ABI // level by counting the number of parameters that are passed diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 215906d9df8b3..823e0caa02262 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -832,6 +832,7 @@ def CC_X86_32_Common : CallingConv<[ CCIfType<[f64], CCAssignToStack<8, 4>>, // Long doubles get slots whose size and alignment depends on the subtarget. + CCIfSubtarget<"isTargetDarwin()", CCIfType<[f80], CCAssignToStack<0, 4>>>, CCIfType<[f80], CCAssignToStack<0, 0>>, // Boolean vectors of AVX-512 are passed in SIMD registers. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1ad1b47a94d28..5e35d5630d667 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4975,6 +4975,16 @@ X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const { return getTargetConstantFromNode(LD); } +bool X86TargetLowering::isTargetCanonicalSelect(SDNode *N) const { + // Do not fold (vselect not(C), X, 0s) to (vselect C, Os, X) + SDValue Cond = N->getOperand(0); + SDValue RHS = N->getOperand(2); + EVT CondVT = Cond.getValueType(); + return N->getOpcode() == ISD::VSELECT && Subtarget.hasAVX512() && + CondVT.getVectorElementType() == MVT::i1 && + ISD::isBuildVectorAllZeros(RHS.getNode()); +} + // Extract raw constant bits from constant pools. static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, APInt &UndefElts, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3039b7eeb38ff..6bcb7a36e91b5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1356,6 +1356,8 @@ namespace llvm { TargetLowering::isTargetCanonicalConstantNode(Op); } + bool isTargetCanonicalSelect(SDNode *N) const override; + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; SDValue unwrapAddress(SDValue N) const override; diff --git a/llvm/lib/Target/X86/X86InstrGISel.td b/llvm/lib/Target/X86/X86InstrGISel.td index f4fa33807cd9a..39198214037a3 100644 --- a/llvm/lib/Target/X86/X86InstrGISel.td +++ b/llvm/lib/Target/X86/X86InstrGISel.td @@ -27,5 +27,13 @@ def G_FIST : X86GenericInstruction { let mayStore = true; } +def G_FNSTCW16 : X86GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst); + let hasSideEffects = true; + let mayStore = true; +} + def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 94812e4e60c3d..57fbc71fa22ee 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -176,10 +176,10 @@ constexpr FeatureBitset FeaturesArrowlakeS = FeaturesArrowlake | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4; constexpr FeatureBitset FeaturesPantherlake = - FeaturesArrowlakeS | FeaturePREFETCHI; + FeaturesArrowlakeS ^ FeatureWIDEKL | FeaturePREFETCHI; constexpr FeatureBitset FeaturesClearwaterforest = - FeaturesSierraforest | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | - FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR; + FeaturesSierraforest ^ FeatureWIDEKL | FeatureAVXVNNIINT16 | FeatureSHA512 | + FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR; // Geode Processor. constexpr FeatureBitset FeaturesGeode = diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 7224a56cd7b8a..fe30c6dc6abe4 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -639,10 +639,10 @@ static DIType *solveDIType(DIBuilder &Builder, Type *Ty, SmallVector Elements; for (unsigned I = 0; I < StructTy->getNumElements(); I++) { DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout, - Scope, LineNum, DITypeCache); + DIStruct, LineNum, DITypeCache); assert(DITy); Elements.push_back(Builder.createMemberType( - Scope, DITy->getName(), Scope->getFile(), LineNum, + DIStruct, DITy->getName(), DIStruct->getFile(), LineNum, DITy->getSizeInBits(), DITy->getAlignInBits(), Layout.getStructLayout(StructTy)->getElementOffsetInBits(I), llvm::DINode::FlagArtificial, DITy)); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index c0f84456d2b27..469f435374793 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -95,6 +95,8 @@ STATISTIC(NewMergedNodes, "Number of new nodes created during merging"); STATISTIC(NonNewMergedNodes, "Number of non new nodes used during merging"); STATISTIC(MissingAllocForContextId, "Number of missing alloc nodes for context ids"); +STATISTIC(SkippedCallsCloning, + "Number of calls skipped during cloning due to unexpected operand"); static cl::opt DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -181,6 +183,12 @@ static cl::opt AllowRecursiveContexts( "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles")); +// Set the minimum absolute count threshold for allowing inlining of indirect +// calls promoted during cloning. +static cl::opt MemProfICPNoInlineThreshold( + "memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, + cl::desc("Minimum absolute count for promoted target to be inlinable")); + namespace llvm { cl::opt EnableMemProfContextDisambiguation( "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, @@ -5155,6 +5163,19 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { assert(!isMemProfClone(*CalledFunction)); + // Because we update the cloned calls by calling setCalledOperand (see + // comment below), out of an abundance of caution make sure the called + // function was actually the called operand (or its aliasee). We also + // strip pointer casts when looking for calls (to match behavior during + // summary generation), however, with opaque pointers in theory this + // should not be an issue. Note we still clone the current function + // (containing this call) above, as that could be needed for its callers. + auto *GA = dyn_cast_or_null(CB->getCalledOperand()); + if (CalledFunction != CB->getCalledOperand() && + (!GA || CalledFunction != GA->getAliaseeObject())) { + SkippedCallsCloning++; + return; + } // Update the calls per the summary info. // Save orig name since it gets updated in the first iteration // below. @@ -5173,7 +5194,13 @@ bool MemProfContextDisambiguation::applyImport(Module &M) { CBClone = CB; else CBClone = cast((*VMaps[J - 1])[CB]); - CBClone->setCalledFunction(NewF); + // Set the called operand directly instead of calling setCalledFunction, + // as the latter mutates the function type on the call. In rare cases + // we may have a slightly different type on a callee function + // declaration due to it being imported from a different module with + // incomplete types. We really just want to change the name of the + // function to the clone, and not make any type changes. + CBClone->setCalledOperand(NewF.getCallee()); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) << ore::NV("Call", CBClone) << " in clone " << ore::NV("Caller", CBClone->getFunction()) @@ -5573,6 +5600,15 @@ void MemProfContextDisambiguation::performICP( .getCallee()); } DirectCall.setCalledFunction(TargetToUse); + // During matching we generate synthetic VP metadata for indirect calls + // not already having any, from the memprof profile's callee GUIDs. If + // we subsequently promote and inline those callees, we currently lose + // the ability to generate this synthetic VP metadata. Optionally apply + // a noinline attribute to promoted direct calls, where the threshold is + // set to capture synthetic VP metadata targets which get a count of 1. + if (MemProfICPNoInlineThreshold && + Candidate.Count < MemProfICPNoInlineThreshold) + DirectCall.setIsNoInline(); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) << ore::NV("Call", CBClone) << " in clone " << ore::NV("Caller", CBClone->getFunction()) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 706cb828acc63..3beda6bc5ba38 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3717,6 +3717,30 @@ Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS) { return nullptr; } +/// Fold Res, Overflow = (umul.with.overflow x c1); (or Overflow (ugt Res c2)) +/// --> (ugt x (c2/c1)). This code checks whether a multiplication of two +/// unsigned numbers (one is a constant) is mathematically greater than a +/// second constant. +static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { + Value *WOV, *X; + const APInt *C1, *C2; + if (match(&I, + m_c_Or(m_ExtractValue<1>( + m_CombineAnd(m_Intrinsic( + m_Value(X), m_APInt(C1)), + m_Value(WOV))), + m_OneUse(m_SpecificCmp(ICmpInst::ICMP_UGT, + m_ExtractValue<0>(m_Deferred(WOV)), + m_APInt(C2))))) && + !C1->isZero()) { + Constant *NewC = ConstantInt::get(X->getType(), C2->udiv(*C1)); + return Builder.CreateICmp(ICmpInst::ICMP_UGT, X, NewC); + } + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -4150,6 +4174,11 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { } } + // Try to fold the pattern "Overflow | icmp pred Res, C2" into a single + // comparison instruction for umul.with.overflow. + if (Value *R = foldOrUnsignedUMulOverflowICmp(I, Builder, DL)) + return replaceInstUsesWith(I, R); + // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions if (sinkNotIntoOtherHandOfLogicalOp(I)) return &I; diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 666f3e51cb30f..5f5200b2c9e62 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4569,16 +4569,37 @@ struct MemorySanitizerVisitor : public InstVisitor { SC.Done(&I); } - // Instrument abs intrinsic. - // handleUnknownIntrinsic can't handle it because of the last - // is_int_min_poison argument which does not match the result type. + // Instrument @llvm.abs intrinsic. + // + // e.g., i32 @llvm.abs.i32 (i32 , i1 ) + // <4 x i32> @llvm.abs.v4i32(<4 x i32> , i1 ) void handleAbsIntrinsic(IntrinsicInst &I) { + assert(I.arg_size() == 2); + Value *Src = I.getArgOperand(0); + Value *IsIntMinPoison = I.getArgOperand(1); + assert(I.getType()->isIntOrIntVectorTy()); - assert(I.getArgOperand(0)->getType() == I.getType()); - // FIXME: Handle is_int_min_poison. + assert(Src->getType() == I.getType()); + + assert(IsIntMinPoison->getType()->isIntegerTy()); + assert(IsIntMinPoison->getType()->getIntegerBitWidth() == 1); + IRBuilder<> IRB(&I); - setShadow(&I, getShadow(&I, 0)); + Value *SrcShadow = getShadow(Src); + + APInt MinVal = + APInt::getSignedMinValue(Src->getType()->getScalarSizeInBits()); + Value *MinValVec = ConstantInt::get(Src->getType(), MinVal); + Value *SrcIsMin = IRB.CreateICmp(CmpInst::ICMP_EQ, Src, MinValVec); + + Value *PoisonedShadow = getPoisonedShadow(Src); + Value *PoisonedIfIntMinShadow = + IRB.CreateSelect(SrcIsMin, PoisonedShadow, SrcShadow); + Value *Shadow = + IRB.CreateSelect(IsIntMinPoison, PoisonedIfIntMinShadow, SrcShadow); + + setShadow(&I, Shadow); setOrigin(&I, getOrigin(&I, 0)); } diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 5b2ca8c5915ff..a69d64956d6d9 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/UniformityAnalysis.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -129,7 +128,6 @@ struct PredInfo { using BBPredicates = DenseMap; using PredMap = DenseMap; using BB2BBMap = DenseMap; -using Val2BBMap = DenseMap; // A traits type that is intended to be used in graph algorithms. The graph // traits starts at an entry node, and traverses the RegionNodes that are in @@ -281,7 +279,7 @@ class StructurizeCFG { ConstantInt *BoolTrue; ConstantInt *BoolFalse; Value *BoolPoison; - const TargetTransformInfo *TTI; + Function *Func; Region *ParentRegion; @@ -303,12 +301,8 @@ class StructurizeCFG { PredMap LoopPreds; BranchVector LoopConds; - Val2BBMap HoistedValues; - RegionNode *PrevNode; - void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB); - void orderNodes(); void analyzeLoops(RegionNode *N); @@ -338,8 +332,6 @@ class StructurizeCFG { void simplifyAffectedPhis(); - void simplifyHoistedPhis(); - DebugLoc killTerminator(BasicBlock *BB); void changeExit(RegionNode *Node, BasicBlock *NewExit, @@ -367,7 +359,7 @@ class StructurizeCFG { public: void init(Region *R); - bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI); + bool run(Region *R, DominatorTree *DT); bool makeUniformRegion(Region *R, UniformityInfo &UA); }; @@ -393,11 +385,8 @@ class StructurizeCFGLegacyPass : public RegionPass { if (SCFG.makeUniformRegion(R, UA)) return false; } - Function *F = R->getEntry()->getParent(); - const TargetTransformInfo *TTI = - &getAnalysis().getTTI(*F); DominatorTree *DT = &getAnalysis().getDomTree(); - return SCFG.run(R, DT, TTI); + return SCFG.run(R, DT); } StringRef getPassName() const override { return "Structurize control flow"; } @@ -405,9 +394,7 @@ class StructurizeCFGLegacyPass : public RegionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { if (SkipUniformRegions) AU.addRequired(); - AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addPreserved(); RegionPass::getAnalysisUsage(AU); @@ -416,34 +403,6 @@ class StructurizeCFGLegacyPass : public RegionPass { } // end anonymous namespace -/// Checks whether an instruction is zero cost instruction and checks if the -/// operands are from different BB. If so, this instruction can be coalesced -/// if its hoisted to predecessor block. So, this returns true. -static bool isHoistableInstruction(Instruction *I, BasicBlock *BB, - const TargetTransformInfo *TTI) { - if (I->getParent() != BB) - return false; - - // If the instruction is not a zero cost instruction, return false. - auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); - InstructionCost::CostType CostVal = - Cost.isValid() - ? Cost.getValue() - : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive; - if (CostVal != 0) - return false; - - // Check if any operands are instructions defined in the same block. - for (auto &Op : I->operands()) { - if (auto *OpI = dyn_cast(Op)) { - if (OpI->getParent() == BB) - return false; - } - } - - return true; -} - char StructurizeCFGLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", @@ -454,39 +413,6 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", "Structurize the CFG", false, false) -/// Structurization can introduce unnecessary VGPR copies due to register -/// coalescing interference. For example, if the Else block has a zero-cost -/// instruction and the Then block modifies the VGPR value, only one value is -/// live at a time in merge block before structurization. After structurization, -/// the coalescer may incorrectly treat the Then value as live in the Else block -/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies. -/// -/// This function examines phi nodes whose incoming values are zero-cost -/// instructions in the Else block. It identifies such values that can be safely -/// hoisted and moves them to the nearest common dominator of Then and Else -/// blocks. A follow-up function after setting PhiNodes assigns the hoisted -/// value to poison phi nodes along the if→flow edge, aiding register coalescing -/// and minimizing unnecessary live ranges. -void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, - BasicBlock *ThenBB) { - - BasicBlock *ElseSucc = ElseBB->getSingleSuccessor(); - BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB); - - if (!ElseSucc || !CommonDominator) - return; - Instruction *Term = CommonDominator->getTerminator(); - for (PHINode &Phi : ElseSucc->phis()) { - Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB); - auto *Inst = dyn_cast(ElseVal); - if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI)) - continue; - Inst->removeFromParent(); - Inst->insertInto(CommonDominator, Term->getIterator()); - HoistedValues[Inst] = CommonDominator; - } -} - /// Build up the general order of nodes, by performing a topological sort of the /// parent region's nodes, while ensuring that there is no outer cycle node /// between any two inner cycle nodes. @@ -609,7 +535,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { BasicBlock *Other = Term->getSuccessor(!i); if (Visited.count(Other) && !Loops.count(Other) && !Pred.count(Other) && !Pred.count(P)) { - hoistZeroCostElseBlockPhiValues(Succ, Other); + Pred[Other] = {BoolFalse, std::nullopt}; Pred[P] = {BoolTrue, std::nullopt}; continue; @@ -965,44 +891,6 @@ void StructurizeCFG::setPhiValues() { AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); } -/// Updates PHI nodes after hoisted zero cost instructions by replacing poison -/// entries on Flow nodes with the appropriate hoisted values -void StructurizeCFG::simplifyHoistedPhis() { - for (WeakVH VH : AffectedPhis) { - PHINode *Phi = dyn_cast_or_null(VH); - if (!Phi || Phi->getNumIncomingValues() != 2) - continue; - - for (int i = 0; i < 2; i++) { - Value *V = Phi->getIncomingValue(i); - auto BBIt = HoistedValues.find(V); - - if (BBIt == HoistedValues.end()) - continue; - - Value *OtherV = Phi->getIncomingValue(!i); - PHINode *OtherPhi = dyn_cast(OtherV); - if (!OtherPhi) - continue; - - int PoisonValBBIdx = -1; - for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) { - if (!isa(OtherPhi->getIncomingValue(i))) - continue; - PoisonValBBIdx = i; - break; - } - if (PoisonValBBIdx == -1 || - !DT->dominates(BBIt->second, - OtherPhi->getIncomingBlock(PoisonValBBIdx))) - continue; - - OtherPhi->setIncomingValue(PoisonValBBIdx, V); - Phi->setIncomingValue(i, OtherV); - } - } -} - void StructurizeCFG::simplifyAffectedPhis() { bool Changed; do { @@ -1395,13 +1283,12 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { } /// Run the transformation for each region found -bool StructurizeCFG::run(Region *R, DominatorTree *DT, - const TargetTransformInfo *TTI) { +bool StructurizeCFG::run(Region *R, DominatorTree *DT) { if (R->isTopLevelRegion()) return false; this->DT = DT; - this->TTI = TTI; + Func = R->getEntry()->getParent(); assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); @@ -1413,7 +1300,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT, insertConditions(false); insertConditions(true); setPhiValues(); - simplifyHoistedPhis(); simplifyConditions(); simplifyAffectedPhis(); rebuildSSA(); @@ -1463,7 +1349,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F, bool Changed = false; DominatorTree *DT = &AM.getResult(F); auto &RI = AM.getResult(F); - TargetTransformInfo *TTI = &AM.getResult(F); + UniformityInfo *UI = nullptr; if (SkipUniformRegions) UI = &AM.getResult(F); @@ -1482,7 +1368,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F, continue; } - Changed |= SCFG.run(R, DT, TTI); + Changed |= SCFG.run(R, DT); } if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 992f98cec0010..fb6640d5cfcf8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -175,6 +175,7 @@ const char LLVMLoopVectorizeFollowupEpilogue[] = STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); STATISTIC(LoopsEpilogueVectorized, "Number of epilogues vectorized"); +STATISTIC(LoopsEarlyExitVectorized, "Number of early exit loops vectorized"); static cl::opt EnableEpilogueVectorization( "enable-epilogue-vectorization", cl::init(true), cl::Hidden, @@ -7205,6 +7206,8 @@ DenseMap LoopVectorizationPlanner::executePlan( "Trying to execute plan with unsupported VF"); assert(BestVPlan.hasUF(BestUF) && "Trying to execute plan with unsupported UF"); + if (BestVPlan.hasEarlyExit()) + ++LoopsEarlyExitVectorized; // TODO: Move to VPlan transform stage once the transition to the VPlan-based // cost model is complete for better cost estimates. VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF, @@ -10061,8 +10064,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Get user vectorization factor and interleave count. ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); - if (LVL.hasUncountableEarlyExit() && UserIC != 1 && - !VectorizerParams::isInterleaveForced()) { + if (LVL.hasUncountableEarlyExit() && UserIC != 1) { UserIC = 1; reportVectorizationInfo("Interleaving not supported for loops " "with uncountable early exits", diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d3761ff43f437..c61e1135524b6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3898,7 +3898,7 @@ class BoUpSLP { /// When ReuseReorderShuffleIndices is empty it just returns position of \p /// V within vector of Scalars. Otherwise, try to remap on its reuse index. - int findLaneForValue(Value *V) const { + unsigned findLaneForValue(Value *V) const { unsigned FoundLane = getVectorFactor(); for (auto *It = find(Scalars, V), *End = Scalars.end(); It != End; std::advance(It, 1)) { @@ -4344,7 +4344,7 @@ class BoUpSLP { /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { - ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, int L) + ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, unsigned L) : Scalar(S), User(U), E(E), Lane(L) {} /// Which scalar in our function. @@ -4357,7 +4357,7 @@ class BoUpSLP { const TreeEntry &E; /// Which lane does the scalar belong to. - int Lane; + unsigned Lane; }; using UserList = SmallVector; @@ -5809,48 +5809,40 @@ static InstructionCost getExtractWithExtendCost( return TTI.getExtractWithExtendCost(Opcode, Dst, VecTy, Index, CostKind); } -/// Correctly creates insert_subvector, checking that the index is multiple of -/// the subvectors length. Otherwise, generates shuffle using \p Generator or +/// Creates subvector insert. Generates shuffle using \p Generator or /// using default shuffle. static Value *createInsertVector( IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index, function_ref)> Generator = {}) { + if (isa(Vec) && isa(V)) + return Vec; const unsigned SubVecVF = getNumElements(V->getType()); - if (Index % SubVecVF == 0) { - Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, Index); - } else { - // Create shuffle, insertvector requires that index is multiple of - // the subvector length. - const unsigned VecVF = getNumElements(Vec->getType()); - SmallVector Mask(VecVF, PoisonMaskElem); - std::iota(Mask.begin(), Mask.end(), 0); - for (unsigned I : seq(SubVecVF)) - Mask[I + Index] = I + VecVF; - if (Generator) { - Vec = Generator(Vec, V, Mask); - } else { - // 1. Resize V to the size of Vec. - SmallVector ResizeMask(VecVF, PoisonMaskElem); - std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0); - V = Builder.CreateShuffleVector(V, ResizeMask); - Vec = Builder.CreateShuffleVector(Vec, V, Mask); - } + // Create shuffle, insertvector requires that index is multiple of + // the subvector length. + const unsigned VecVF = getNumElements(Vec->getType()); + SmallVector Mask(VecVF, PoisonMaskElem); + if (isa(Vec)) { + auto *Begin = std::next(Mask.begin(), Index); + std::iota(Begin, std::next(Begin, SubVecVF), 0); + Vec = Builder.CreateShuffleVector(V, Mask); + return Vec; } - return Vec; + std::iota(Mask.begin(), Mask.end(), 0); + std::iota(std::next(Mask.begin(), Index), + std::next(Mask.begin(), Index + SubVecVF), VecVF); + if (Generator) + return Generator(Vec, V, Mask); + // 1. Resize V to the size of Vec. + SmallVector ResizeMask(VecVF, PoisonMaskElem); + std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0); + V = Builder.CreateShuffleVector(V, ResizeMask); + // 2. Insert V into Vec. + return Builder.CreateShuffleVector(Vec, V, Mask); } -/// Correctly creates extract_subvector, checking that the index is multiple of -/// the subvectors length. Otherwise, generates shuffle using \p Generator or -/// using default shuffle. +/// Generates subvector extract using \p Generator or using default shuffle. static Value *createExtractVector(IRBuilderBase &Builder, Value *Vec, unsigned SubVecVF, unsigned Index) { - if (Index % SubVecVF == 0) { - VectorType *SubVecTy = - getWidenedType(Vec->getType()->getScalarType(), SubVecVF); - return Builder.CreateExtractVector(SubVecTy, Vec, Index); - } - // Create shuffle, extract_subvector requires that index is multiple of - // the subvector length. SmallVector Mask(SubVecVF, PoisonMaskElem); std::iota(Mask.begin(), Mask.end(), Index); return Builder.CreateShuffleVector(Vec, Mask); @@ -7901,7 +7893,7 @@ void BoUpSLP::buildExternalUses( // Check if the scalar is externally used as an extra arg. const auto ExtI = ExternallyUsedValues.find(Scalar); if (ExtI != ExternallyUsedValues.end()) { - int FoundLane = Entry->findLaneForValue(Scalar); + unsigned FoundLane = Entry->findLaneForValue(Scalar); LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " << FoundLane << " from " << *Scalar << ".\n"); ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()); @@ -7949,7 +7941,7 @@ void BoUpSLP::buildExternalUses( if (U && Scalar->hasNUsesOrMore(UsesLimit)) U = nullptr; - int FoundLane = Entry->findLaneForValue(Scalar); + unsigned FoundLane = Entry->findLaneForValue(Scalar); LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst << " from lane " << FoundLane << " from " << *Scalar << ".\n"); @@ -16275,8 +16267,8 @@ Value *BoUpSLP::gather( assert(SLPReVec && "FixedVectorType is not expected."); Vec = createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy)); - auto *II = dyn_cast(Vec); - if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) + auto *II = dyn_cast(Vec); + if (!II) return Vec; InsElt = II; } else { @@ -16296,6 +16288,28 @@ Value *BoUpSLP::gather( if (auto *SI = dyn_cast(Scalar)) UserOp = SI; } else { + if (V->getType()->isVectorTy()) { + if (auto *SV = dyn_cast(InsElt); + SV && SV->getOperand(0) != V && SV->getOperand(1) != V) { + // Find shufflevector, caused by resize. + auto FindOperand = [](Value *Vec, Value *V) -> Instruction * { + if (auto *SV = dyn_cast(Vec)) { + if (SV->getOperand(0) == V) + return SV; + if (SV->getOperand(1) == V) + return SV; + } + return nullptr; + }; + InsElt = nullptr; + if (Instruction *User = FindOperand(SV->getOperand(0), V)) + InsElt = User; + else if (Instruction *User = FindOperand(SV->getOperand(1), V)) + InsElt = User; + assert(InsElt && + "Failed to find shufflevector, caused by resize."); + } + } UserOp = InsElt; } if (UserOp) { @@ -16864,10 +16878,18 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { V, SimplifyQuery(*R.DL)); })); unsigned InsertionIndex = Idx * getNumElements(ScalarTy); + // Use scalar version of the SCalarType to correctly handle shuffles + // for revectorization. The revectorization mode operates by the + // vectors, but here we need to operate on the scalars, because the + // masks were already transformed for the vector elements and we don't + // need doing this transformation again. + Type *OrigScalarTy = ScalarTy; + ScalarTy = ScalarTy->getScalarType(); Vec = createInsertVector( Builder, Vec, V, InsertionIndex, std::bind(&ShuffleInstructionBuilder::createShuffle, this, _1, _2, _3)); + ScalarTy = OrigScalarTy; if (!CommonMask.empty()) { std::iota(std::next(CommonMask.begin(), Idx), std::next(CommonMask.begin(), Idx + E->getVectorFactor()), @@ -21722,6 +21744,8 @@ class HorizontalReduction { /// Checks if the optimization of original scalar identity operations on /// matched horizontal reductions is enabled and allowed. bool IsSupportedHorRdxIdentityOp = false; + /// The minimum number of the reduced values. + const unsigned ReductionLimit = VectorizeNonPowerOf2 ? 3 : 4; /// Contains vector values for reduction including their scale factor and /// signedness. SmallVector> VectorValuesAndScales; @@ -21740,7 +21764,8 @@ class HorizontalReduction { } /// Checks if instruction is associative and can be vectorized. - static bool isVectorizable(RecurKind Kind, Instruction *I) { + static bool isVectorizable(RecurKind Kind, Instruction *I, + bool TwoElementReduction = false) { if (Kind == RecurKind::None) return false; @@ -21749,6 +21774,10 @@ class HorizontalReduction { isBoolLogicOp(I)) return true; + // No need to check for associativity, if 2 reduced values. + if (TwoElementReduction) + return true; + if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) { // FP min/max are associative except for NaN and -0.0. We do not // have to rule out -0.0 here because the intrinsic semantics do not @@ -22020,6 +22049,27 @@ class HorizontalReduction { public: HorizontalReduction() = default; + HorizontalReduction(Instruction *I, ArrayRef Ops) + : ReductionRoot(I), ReductionLimit(2) { + RdxKind = HorizontalReduction::getRdxKind(I); + ReductionOps.emplace_back().push_back(I); + ReducedVals.emplace_back().assign(Ops.begin(), Ops.end()); + for (Value *V : Ops) + ReducedValsToOps[V].push_back(I); + } + + bool matchReductionForOperands() const { + // Analyze "regular" integer/FP types for reductions - no target-specific + // types or pointers. + assert(ReductionRoot && "Reduction root is not set!"); + if (!isVectorizable(RdxKind, cast(ReductionRoot), + all_of(ReducedVals, [](ArrayRef Ops) { + return Ops.size() == 2; + }))) + return false; + + return true; + } /// Try to find a reduction tree. bool matchAssociativeReduction(BoUpSLP &R, Instruction *Root, @@ -22187,7 +22237,6 @@ class HorizontalReduction { /// Attempt to vectorize the tree found by matchAssociativeReduction. Value *tryToReduce(BoUpSLP &V, const DataLayout &DL, TargetTransformInfo *TTI, const TargetLibraryInfo &TLI, AssumptionCache *AC) { - const unsigned ReductionLimit = VectorizeNonPowerOf2 ? 3 : 4; constexpr unsigned RegMaxNumber = 4; constexpr unsigned RedValsMaxNumber = 128; // If there are a sufficient number of reduction values, reduce @@ -22521,8 +22570,10 @@ class HorizontalReduction { continue; } V.reorderTopToBottom(); - // No need to reorder the root node at all. - V.reorderBottomToTop(/*IgnoreReorder=*/true); + // No need to reorder the root node at all for reassociative reduction. + V.reorderBottomToTop(/*IgnoreReorder=*/RdxFMF.allowReassoc() || + VL.front()->getType()->isIntOrIntVectorTy() || + ReductionLimit > 2); // Keep extracted other reduction values, if they are used in the // vectorization trees. BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues( @@ -23736,15 +23787,61 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) { Candidates.emplace_back(A1, B); } + auto TryToReduce = [this, &R, &TTI = *TTI](Instruction *Inst, + ArrayRef Ops) { + if (!isReductionCandidate(Inst)) + return false; + Type *Ty = Inst->getType(); + if (!isValidElementType(Ty) || Ty->isPointerTy()) + return false; + HorizontalReduction HorRdx(Inst, Ops); + if (!HorRdx.matchReductionForOperands()) + return false; + // Check the cost of operations. + VectorType *VecTy = getWidenedType(Ty, Ops.size()); + constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + InstructionCost ScalarCost = + TTI.getScalarizationOverhead( + VecTy, APInt::getAllOnes(getNumElements(VecTy)), /*Insert=*/false, + /*Extract=*/true, CostKind) + + TTI.getInstructionCost(Inst, CostKind); + InstructionCost RedCost; + switch (::getRdxKind(Inst)) { + case RecurKind::Add: + case RecurKind::Mul: + case RecurKind::Or: + case RecurKind::And: + case RecurKind::Xor: + case RecurKind::FAdd: + case RecurKind::FMul: { + FastMathFlags FMF; + if (auto *FPCI = dyn_cast(Inst)) + FMF = FPCI->getFastMathFlags(); + RedCost = TTI.getArithmeticReductionCost(Inst->getOpcode(), VecTy, FMF, + CostKind); + break; + } + default: + return false; + } + if (RedCost >= ScalarCost) + return false; + + return HorRdx.tryToReduce(R, *DL, &TTI, *TLI, AC) != nullptr; + }; if (Candidates.size() == 1) - return tryToVectorizeList({Op0, Op1}, R); + return TryToReduce(I, {Op0, Op1}) || tryToVectorizeList({Op0, Op1}, R); // We have multiple options. Try to pick the single best. std::optional BestCandidate = R.findBestRootPair(Candidates); if (!BestCandidate) return false; - return tryToVectorizeList( - {Candidates[*BestCandidate].first, Candidates[*BestCandidate].second}, R); + return (*BestCandidate == 0 && + TryToReduce(I, {Candidates[*BestCandidate].first, + Candidates[*BestCandidate].second})) || + tryToVectorizeList({Candidates[*BestCandidate].first, + Candidates[*BestCandidate].second}, + R); } bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Instruction *Root, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 9a6e4b36397b3..85741b977bb77 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4183,7 +4183,8 @@ class VPlan { /// block with multiple predecessors (one for the exit via the latch and one /// via the other early exit). bool hasEarlyExit() const { - return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1; + return ExitBlocks.size() > 1 || + (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1); } /// Returns true if the scalar tail may execute after the vector loop. Note diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 75ade13b09d9c..3c367664a0988 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3055,8 +3055,7 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); - const Align Alignment = - getLoadStoreAlignment(const_cast(&Ingredient)); + const Align Alignment = getLoadStoreAlignment(&Ingredient); unsigned AS = cast(Ctx.Types.inferScalarType(getAddr())) ->getAddressSpace(); unsigned Opcode = isa(this) @@ -3196,10 +3195,8 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we // don't need to compare to the legacy cost model. Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); - const Align Alignment = - getLoadStoreAlignment(const_cast(&Ingredient)); - unsigned AS = - getLoadStoreAddressSpace(const_cast(&Ingredient)); + const Align Alignment = getLoadStoreAlignment(&Ingredient); + unsigned AS = getLoadStoreAddressSpace(&Ingredient); InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( Instruction::Load, Ty, Alignment, AS, Ctx.CostKind); if (!Reverse) @@ -3309,10 +3306,8 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we // don't need to compare to the legacy cost model. Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); - const Align Alignment = - getLoadStoreAlignment(const_cast(&Ingredient)); - unsigned AS = - getLoadStoreAddressSpace(const_cast(&Ingredient)); + const Align Alignment = getLoadStoreAlignment(&Ingredient); + unsigned AS = getLoadStoreAddressSpace(&Ingredient); InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( Instruction::Store, Ty, Alignment, AS, Ctx.CostKind); if (!Reverse) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 02cea8620d271..6a3b3e6e41955 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3083,16 +3083,15 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { } /// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be -/// converted to a narrower recipe. \p V is used by a wide recipe \p WideMember -/// that feeds a store interleave group at index \p Idx, \p WideMember0 is the -/// recipe feeding the same interleave group at index 0. A VPWidenLoadRecipe can -/// be narrowed to an index-independent load if it feeds all wide ops at all -/// indices (\p OpV must be the operand at index \p OpIdx for both the recipe at -/// lane 0, \p WideMember0, and \p WideMember). A VPInterleaveRecipe can be -/// narrowed to a wide load, if \p V is defined at \p Idx of a load interleave -/// group. -static bool canNarrowLoad(VPWidenRecipe *WideMember0, VPWidenRecipe *WideMember, - unsigned OpIdx, VPValue *OpV, unsigned Idx) { +/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a +/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding +/// the same interleave group at index 0. A VPWidenLoadRecipe can be narrowed to +/// an index-independent load if it feeds all wide ops at all indices (\p OpV +/// must be the operand at index \p OpIdx for both the recipe at lane 0, \p +/// WideMember0). A VPInterleaveRecipe can be narrowed to a wide load, if \p V +/// is defined at \p Idx of a load interleave group. +static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx, + VPValue *OpV, unsigned Idx) { auto *DefR = OpV->getDefiningRecipe(); if (!DefR) return WideMember0->getOperand(OpIdx) == OpV; @@ -3165,6 +3164,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) continue; + if (isa(&R) && + vputils::onlyFirstLaneUsed(cast(&R))) + continue; + // Bail out on recipes not supported at the moment: // * phi recipes other than the canonical induction // * recipes writing to memory except interleave groups @@ -3236,9 +3239,9 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, R->getNumOperands() > 2) return; if (any_of(enumerate(R->operands()), - [WideMember0, Idx = I, R](const auto &P) { + [WideMember0, Idx = I](const auto &P) { const auto &[OpIdx, OpV] = P; - return !canNarrowLoad(WideMember0, R, OpIdx, OpV, Idx); + return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx); })) return; } diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index 39debd8e3dddc..4bb4818cc53ef 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -978,6 +978,122 @@ define void @store() { ret void } +define void @gather() { +; ARGBASED-LABEL: 'gather' +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> poison, <2 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> poison, <4 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> poison, <8 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> poison, <16 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %5 = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> poison, <2 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %6 = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> poison, <4 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %7 = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> poison, <8 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %8 = call <16 x i64> @llvm.vp.gather.v16i64.v16p0(<16 x ptr> poison, <16 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call @llvm.vp.gather.nxv2i8.nxv2p0( poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %10 = call @llvm.vp.gather.nxv4i8.nxv4p0( poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %11 = call @llvm.vp.gather.nxv8i8.nxv8p0( poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %12 = call @llvm.vp.gather.nxv16i8.nxv16p0( poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Invalid cost for instruction: %13 = call @llvm.vp.gather.nxv2i64.nxv2p0( poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Invalid cost for instruction: %14 = call @llvm.vp.gather.nxv4i64.nxv4p0( poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Invalid cost for instruction: %15 = call @llvm.vp.gather.nxv8i64.nxv8p0( poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.vp.gather.nxv16i64.nxv16p0( poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPEBASED-LABEL: 'gather' +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %1 = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> poison, <2 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %2 = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> poison, <4 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %3 = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> poison, <8 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %4 = call <16 x i8> @llvm.vp.gather.v16i8.v16p0(<16 x ptr> poison, <16 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %5 = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> poison, <2 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %6 = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> poison, <4 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %7 = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> poison, <8 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %8 = call <16 x i64> @llvm.vp.gather.v16i64.v16p0(<16 x ptr> poison, <16 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.vp.gather.nxv2i8.nxv2p0( poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.vp.gather.nxv4i8.nxv4p0( poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %11 = call @llvm.vp.gather.nxv8i8.nxv8p0( poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %12 = call @llvm.vp.gather.nxv16i8.nxv16p0( poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %13 = call @llvm.vp.gather.nxv2i64.nxv2p0( poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %14 = call @llvm.vp.gather.nxv4i64.nxv4p0( poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %15 = call @llvm.vp.gather.nxv8i64.nxv8p0( poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.vp.gather.nxv16i64.nxv16p0( poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call <2 x i8> @llvm.vp.gather(<2 x ptr> poison, <2 x i1> poison, i32 poison) + call <4 x i8> @llvm.vp.gather(<4 x ptr> poison, <4 x i1> poison, i32 poison) + call <8 x i8> @llvm.vp.gather(<8 x ptr> poison, <8 x i1> poison, i32 poison) + call <16 x i8> @llvm.vp.gather(<16 x ptr> poison, <16 x i1> poison, i32 poison) + call <2 x i64> @llvm.vp.gather(<2 x ptr> poison, <2 x i1> poison, i32 poison) + call <4 x i64> @llvm.vp.gather(<4 x ptr> poison, <4 x i1> poison, i32 poison) + call <8 x i64> @llvm.vp.gather(<8 x ptr> poison, <8 x i1> poison, i32 poison) + call <16 x i64> @llvm.vp.gather(<16 x ptr> poison, <16 x i1> poison, i32 poison) + call @llvm.vp.gather( poison, poison, i32 poison) + call @llvm.vp.gather( poison, poison, i32 poison) + call @llvm.vp.gather( poison, poison, i32 poison) + call @llvm.vp.gather( poison, poison, i32 poison) + call @llvm.vp.gather( poison, poison, i32 poison) + call @llvm.vp.gather( poison, poison, i32 poison) + call @llvm.vp.gather( poison, poison, i32 poison) + call @llvm.vp.gather( poison, poison, i32 poison) + ret void +} + +define void @scatter() { +; ARGBASED-LABEL: 'scatter' +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 141 for instruction: call void @llvm.vp.scatter.v16i64.v16p0(<16 x i64> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.scatter.nxv2i8.nxv2p0( poison, poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.scatter.nxv4i8.nxv4p0( poison, poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.scatter.nxv8i8.nxv8p0( poison, poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.vp.scatter.nxv16i8.nxv16p0( poison, poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv2i64.nxv2p0( poison, poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv4i64.nxv4p0( poison, poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv8i64.nxv8p0( poison, poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv16i64.nxv16p0( poison, poison, poison, i32 poison) +; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPEBASED-LABEL: 'scatter' +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 141 for instruction: call void @llvm.vp.scatter.v16i8.v16p0(<16 x i8> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 141 for instruction: call void @llvm.vp.scatter.v16i64.v16p0(<16 x i64> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv2i8.nxv2p0( poison, poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv4i8.nxv4p0( poison, poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv8i8.nxv8p0( poison, poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv16i8.nxv16p0( poison, poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv2i64.nxv2p0( poison, poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv4i64.nxv4p0( poison, poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv8i64.nxv8p0( poison, poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.vp.scatter.nxv16i64.nxv16p0( poison, poison, poison, i32 poison) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.vp.scatter(<2 x i8> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison) + call void @llvm.vp.scatter(<4 x i8> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison) + call void @llvm.vp.scatter(<8 x i8> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison) + call void @llvm.vp.scatter(<16 x i8> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison) + call void @llvm.vp.scatter(<2 x i64> poison, <2 x ptr> poison, <2 x i1> poison, i32 poison) + call void @llvm.vp.scatter(<4 x i64> poison, <4 x ptr> poison, <4 x i1> poison, i32 poison) + call void @llvm.vp.scatter(<8 x i64> poison, <8 x ptr> poison, <8 x i1> poison, i32 poison) + call void @llvm.vp.scatter(<16 x i64> poison, <16 x ptr> poison, <16 x i1> poison, i32 poison) + call void @llvm.vp.scatter( poison, poison, poison, i32 poison) + call void @llvm.vp.scatter( poison, poison, poison, i32 poison) + call void @llvm.vp.scatter( poison, poison, poison, i32 poison) + call void @llvm.vp.scatter( poison, poison, poison, i32 poison) + call void @llvm.vp.scatter( poison, poison, poison, i32 poison) + call void @llvm.vp.scatter( poison, poison, poison, i32 poison) + call void @llvm.vp.scatter( poison, poison, poison, i32 poison) + call void @llvm.vp.scatter( poison, poison, poison, i32 poison) + ret void +} + define void @strided_load() { ; ARGBASED-LABEL: 'strided_load' ; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ti1_2 = call <2 x i1> @llvm.experimental.vp.strided.load.v2i1.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef) diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll index 6768e9067dca3..d3301520fd107 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -802,7 +802,7 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 -; CHECK-NEXT: da analyze - flow [<= =|<]! +; CHECK-NEXT: da analyze - flow [<= 0|<]! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 @@ -816,7 +816,7 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; NORMALIZE-NEXT: da analyze - output [* *]! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 -; NORMALIZE-NEXT: da analyze - flow [<= =|<]! +; NORMALIZE-NEXT: da analyze - flow [<= 0|<]! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 @@ -830,7 +830,7 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; DELIN-NEXT: da analyze - output [* *]! ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 -; DELIN-NEXT: da analyze - flow [<= =|<]! +; DELIN-NEXT: da analyze - flow [<= 0|<]! ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; DELIN-NEXT: da analyze - confused! ; DELIN-NEXT: Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 @@ -888,7 +888,7 @@ define void @banerjee10(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 -; CHECK-NEXT: da analyze - flow [<> =]! +; CHECK-NEXT: da analyze - flow [<> 0]! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 @@ -902,7 +902,7 @@ define void @banerjee10(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; NORMALIZE-NEXT: da analyze - none! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 -; NORMALIZE-NEXT: da analyze - flow [<> =]! +; NORMALIZE-NEXT: da analyze - flow [<> 0]! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 @@ -916,7 +916,7 @@ define void @banerjee10(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; DELIN-NEXT: da analyze - none! ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 -; DELIN-NEXT: da analyze - flow [<> =]! +; DELIN-NEXT: da analyze - flow [<> 0]! ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; DELIN-NEXT: da analyze - confused! ; DELIN-NEXT: Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 @@ -1058,7 +1058,7 @@ define void @banerjee12(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8 -; CHECK-NEXT: da analyze - flow [= <>]! +; CHECK-NEXT: da analyze - flow [0 <>]! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8 @@ -1072,7 +1072,7 @@ define void @banerjee12(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; NORMALIZE-NEXT: da analyze - none! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8 -; NORMALIZE-NEXT: da analyze - flow [= <>]! +; NORMALIZE-NEXT: da analyze - flow [0 <>]! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8 @@ -1086,7 +1086,7 @@ define void @banerjee12(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; DELIN-NEXT: da analyze - none! ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8 -; DELIN-NEXT: da analyze - flow [= <>]! +; DELIN-NEXT: da analyze - flow [0 <>]! ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8 ; DELIN-NEXT: da analyze - confused! ; DELIN-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8 diff --git a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll index ff9f393f88152..06bfc5d2e8573 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll @@ -285,7 +285,7 @@ define void @couple6(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %conv, ptr %arrayidx1, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - flow [=|<]! +; CHECK-NEXT: da analyze - flow [0|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4 @@ -503,7 +503,7 @@ define void @couple11(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %conv, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - flow [=|<] splitable! +; CHECK-NEXT: da analyze - flow [0|<] splitable! ; CHECK-NEXT: da analyze - split level = 1, iteration = 9! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 ; CHECK-NEXT: da analyze - confused! @@ -636,7 +636,7 @@ define void @couple14(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %conv, ptr %arrayidx3, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - flow [=|<]! +; CHECK-NEXT: da analyze - flow [0|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll index f0cd2fd4cd930..e5d5d21e365a1 100644 --- a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll +++ b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll @@ -18,7 +18,7 @@ define void @i32_subscript(ptr %a, ptr %b) { ; CHECK-NEXT: Src: %0 = load i32, ptr %a.addr, align 4 --> Dst: %0 = load i32, ptr %a.addr, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %a.addr, align 4 --> Dst: store i32 %1, ptr %a.addr.2, align 4 -; CHECK-NEXT: da analyze - anti [=|<]! +; CHECK-NEXT: da analyze - anti [0|<]! ; CHECK-NEXT: Src: store i32 %1, ptr %a.addr.2, align 4 --> Dst: store i32 %1, ptr %a.addr.2, align 4 ; CHECK-NEXT: da analyze - none! ; diff --git a/llvm/test/Bindings/llvm-c/debug_info_new_format.ll b/llvm/test/Bindings/llvm-c/debug_info_new_format.ll index 86ec915bd03ba..83b37da759b5c 100644 --- a/llvm/test/Bindings/llvm-c/debug_info_new_format.ll +++ b/llvm/test/Bindings/llvm-c/debug_info_new_format.ll @@ -3,30 +3,36 @@ ; CHECK: ; ModuleID = 'debuginfo.c' ; CHECK-NEXT: source_filename = "debuginfo.c" - -; CHECK: define i64 @foo(i64 %0, i64 %1, <10 x i64> %2) !dbg !36 { + +; CHECK: define i64 @foo(i64 %0, i64 %1, <10 x i64> %2) !dbg !44 { ; CHECK-NEXT: entry: -; CHECK-NEXT: #dbg_declare(i64 0, !43, !DIExpression(), !50) -; CHECK-NEXT: #dbg_declare(i64 0, !44, !DIExpression(), !50) -; CHECK-NEXT: #dbg_declare(i64 0, !45, !DIExpression(), !50) -; CHECK-NEXT: #dbg_label(!51, !50) +; CHECK-NEXT: #dbg_declare(i64 0, !49, !DIExpression(), !58) +; CHECK-NEXT: #dbg_declare(i64 0, !50, !DIExpression(), !58) +; CHECK-NEXT: #dbg_declare(i64 0, !51, !DIExpression(), !58) +; CHECK-NEXT: #dbg_label(!59, !58) ; CHECK-NEXT: br label %vars -; CHECK-NEXT: #dbg_label(!52, !50) +; CHECK-NEXT: #dbg_label(!60, !58) ; CHECK-NEXT: br label %vars -; CHECK: vars: + +; CHECK: vars: ; preds = %entry, %entry ; CHECK-NEXT: %p1 = phi i64 [ 0, %entry ] ; CHECK-NEXT: %p2 = phi i64 [ 0, %entry ] -; CHECK-NEXT: #dbg_value(i64 0, !46, !DIExpression(DW_OP_constu, 0, DW_OP_stack_value), !53) -; CHECK-NEXT: #dbg_value(i64 1, !48, !DIExpression(DW_OP_constu, 1, DW_OP_stack_value), !53) +; CHECK-NEXT: #dbg_value(i64 0, !42, !DIExpression(DW_OP_constu, 0, DW_OP_stack_value), !61) +; CHECK-NEXT: #dbg_value(i64 1, !52, !DIExpression(DW_OP_constu, 1, DW_OP_stack_value), !61) ; CHECK-NEXT: %a = add i64 %p1, %p2 ; CHECK-NEXT: ret i64 0 ; CHECK-NEXT: } - + ; CHECK: !llvm.dbg.cu = !{!0} ; CHECK-NEXT: !FooType = !{!33} ; CHECK-NEXT: !EnumTest = !{!3} ; CHECK-NEXT: !LargeEnumTest = !{!11} - +; CHECK-NEXT: !SubrangeType = !{!36} +; CHECK-NEXT: !SetType1 = !{!37} +; CHECK-NEXT: !SetType2 = !{!38} +; CHECK-NEXT: !DynType = !{!39} +; CHECK-NEXT: !ClassType = !{!54} + ; CHECK: !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "llvm-c-test", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !16, imports: !24, macros: !28, splitDebugInlining: false, sysroot: "/") ; CHECK-NEXT: !1 = !DIFile(filename: "debuginfo.c", directory: ".") ; CHECK-NEXT: !2 = !{!3, !11} @@ -63,21 +69,29 @@ ; CHECK-NEXT: !33 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !34, size: 192, dwarfAddressSpace: 0) ; CHECK-NEXT: !34 = !DICompositeType(tag: DW_TAG_structure_type, name: "MyStruct", scope: !4, file: !1, size: 192, elements: !35, runtimeLang: DW_LANG_C89, identifier: "MyStruct") ; CHECK-NEXT: !35 = !{!6, !6, !6} -; CHECK-NEXT: !36 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 42, type: !37, scopeLine: 42, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !42) -; CHECK-NEXT: !37 = !DISubroutineType(types: !38) -; CHECK-NEXT: !38 = !{!6, !6, !39} -; CHECK-NEXT: !39 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 640, flags: DIFlagVector, elements: !40) +; CHECK-NEXT: !36 = !DISubrangeType(name: "foo", scope: !1, file: !1, line: 42, size: 64, baseType: !6, lowerBound: i64 0, upperBound: i64 1, stride: i64 8, bias: i64 4) +; CHECK-NEXT: !37 = !DIDerivedType(tag: DW_TAG_set_type, name: "enumset", scope: !1, file: !1, line: 42, baseType: !3, size: 64) +; CHECK-NEXT: !38 = !DIDerivedType(tag: DW_TAG_set_type, name: "subrangeset", scope: !1, file: !1, line: 42, baseType: !36, size: 64) +; CHECK-NEXT: !39 = !DICompositeType(tag: DW_TAG_array_type, name: "foo", scope: !1, file: !1, line: 42, baseType: !6, size: 640, elements: !40, dataLocation: !DIExpression(), associated: !42, rank: !DIExpression()) ; CHECK-NEXT: !40 = !{!41} ; CHECK-NEXT: !41 = !DISubrange(count: 10, lowerBound: 0) -; CHECK-NEXT: !42 = !{!43, !44, !45, !46, !48, !49} -; CHECK-NEXT: !43 = !DILocalVariable(name: "a", arg: 1, scope: !36, file: !1, line: 42, type: !6) -; CHECK-NEXT: !44 = !DILocalVariable(name: "b", arg: 2, scope: !36, file: !1, line: 42, type: !6) -; CHECK-NEXT: !45 = !DILocalVariable(name: "c", arg: 3, scope: !36, file: !1, line: 42, type: !39) -; CHECK-NEXT: !46 = !DILocalVariable(name: "d", scope: !47, file: !1, line: 43, type: !6) -; CHECK-NEXT: !47 = distinct !DILexicalBlock(scope: !36, file: !1, line: 42) -; CHECK-NEXT: !48 = !DILocalVariable(name: "e", scope: !47, file: !1, line: 44, type: !6) -; CHECK-NEXT: !49 = !DILabel(scope: !36, name: "label3", file: !1, line: 42) -; CHECK-NEXT: !50 = !DILocation(line: 42, scope: !36) -; CHECK-NEXT: !51 = !DILabel(scope: !36, name: "label1", file: !1, line: 42) -; CHECK-NEXT: !52 = !DILabel(scope: !36, name: "label2", file: !1, line: 42) -; CHECK-NEXT: !53 = !DILocation(line: 43, scope: !36) +; CHECK-NEXT: !42 = !DILocalVariable(name: "d", scope: !43, file: !1, line: 43, type: !6) +; CHECK-NEXT: !43 = distinct !DILexicalBlock(scope: !44, file: !1, line: 42) +; CHECK-NEXT: !44 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 42, type: !45, scopeLine: 42, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !48) +; CHECK-NEXT: !45 = !DISubroutineType(types: !46) +; CHECK-NEXT: !46 = !{!6, !6, !47} +; CHECK-NEXT: !47 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 640, flags: DIFlagVector, elements: !40) +; CHECK-NEXT: !48 = !{!49, !50, !51, !42, !52, !53} +; CHECK-NEXT: !49 = !DILocalVariable(name: "a", arg: 1, scope: !44, file: !1, line: 42, type: !6) +; CHECK-NEXT: !50 = !DILocalVariable(name: "b", arg: 2, scope: !44, file: !1, line: 42, type: !6) +; CHECK-NEXT: !51 = !DILocalVariable(name: "c", arg: 3, scope: !44, file: !1, line: 42, type: !47) +; CHECK-NEXT: !52 = !DILocalVariable(name: "e", scope: !43, file: !1, line: 44, type: !6) +; CHECK-NEXT: !53 = !DILabel(scope: !44, name: "label3", file: !1, line: 42) +; CHECK-NEXT: !54 = !DICompositeType(tag: DW_TAG_class_type, name: "Class", scope: !4, file: !1, size: 192, flags: DIFlagFwdDecl, elements: !55, identifier: "FooClass") +; CHECK-NEXT: !55 = !{!56} +; CHECK-NEXT: !56 = !{!6, !6, !57} +; CHECK-NEXT: !57 = !DIBasicType(name: "Int32", size: 32) +; CHECK-NEXT: !58 = !DILocation(line: 42, scope: !44) +; CHECK-NEXT: !59 = !DILabel(scope: !44, name: "label1", file: !1, line: 42) +; CHECK-NEXT: !60 = !DILabel(scope: !44, name: "label2", file: !1, line: 42) +; CHECK-NEXT: !61 = !DILocation(line: 43, scope: !44) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index d0424f2e400fc..bd2d8c095831b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -636,6 +636,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_GET_ROUNDING (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/AArch64/ldst_update_cfpath.mir b/llvm/test/CodeGen/AArch64/ldst_update_cfpath.mir new file mode 100644 index 0000000000000..8f541af2eb1a3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ldst_update_cfpath.mir @@ -0,0 +1,386 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -run-pass=aarch64-ldst-opt -verify-machineinstrs %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-none-linux-gnu" + + ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable vscale_range(1,16) + define dso_local void @test(ptr noundef captures(none) %data) local_unnamed_addr #0 { + entry: + br i1 undef, label %while.cond.preheader, label %for.body.preheader + + for.body.preheader: ; preds = %entry + br label %for.body + + while.cond.preheader.loopexit: ; preds = %for.body + br label %while.cond.preheader + + while.cond.preheader: ; preds = %while.cond.preheader.loopexit, %entry + br i1 undef, label %while.body.lr.ph.lr.ph, label %for.cond28.preheader + + while.body.lr.ph.lr.ph: ; preds = %while.cond.preheader + br label %while.body.preheader + + for.body: ; preds = %for.body, %for.body.preheader + br i1 undef, label %for.body, label %while.cond.preheader.loopexit + + for.cond28.preheader: ; preds = %if.then, %if.end, %while.cond.preheader + br i1 undef, label %for.end45, label %for.body36.preheader + + for.body36.preheader: ; preds = %for.cond28.preheader + br label %for.body36 + + while.body: ; preds = %if.end, %while.body.preheader + br i1 undef, label %if.end, label %if.then + + if.then: ; preds = %while.body + br i1 undef, label %while.body.preheader, label %for.cond28.preheader + + while.body.preheader: ; preds = %if.then, %while.body.lr.ph.lr.ph + br label %while.body + + if.end: ; preds = %while.body + br i1 undef, label %for.cond28.preheader, label %while.body + + for.body36: ; preds = %for.body36.preheader, %for.body36 + br i1 undef, label %for.body36, label %for.end45 + + for.end45: ; preds = %for.body36, %for.cond28.preheader + ret void + } + +... +--- +name: test +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: true +isSSA: false +noVRegs: true +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHContTarget: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: true + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x0, 4 + ; CHECK-NEXT: TBNZW killed renamable $w8, 3, %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w11 = MOVZWi 1, 0 + ; CHECK-NEXT: renamable $w9 = MOVZWi 1, 0 + ; CHECK-NEXT: renamable $w8 = MOVZWi 2, 0, implicit-def $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.while.cond.preheader: + ; CHECK-NEXT: successors: %bb.3(0x60000000), %bb.8(0x20000000) + ; CHECK-NEXT: liveins: $w9, $w11, $x0, $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w11, 299, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 8, %bb.8, implicit $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.while.cond.preheader: + ; CHECK-NEXT: successors: %bb.4(0x55555555), %bb.8(0x2aaaaaab) + ; CHECK-NEXT: liveins: $w9, $x0, $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10 = nuw ADDXrx renamable $x0, renamable $w9, 18 + ; CHECK-NEXT: renamable $w11 = LDRWui renamable $x10, 0 + ; CHECK-NEXT: TBZW killed renamable $w11, 3, %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.14(0x80000000) + ; CHECK-NEXT: liveins: $w9, $x0, $x8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w9 = ORRWrs $wzr, killed renamable $w9, 0, implicit-def $x9 + ; CHECK-NEXT: B %bb.14 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.for.body.preheader: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10 = ADDXri renamable $x0, 8, 0 + ; CHECK-NEXT: renamable $w9 = MOVZWi 1, 0 + ; CHECK-NEXT: renamable $w12 = MOVZWi 2, 0, implicit-def $x12 + ; CHECK-NEXT: $x11 = ORRXrs $xzr, $x10, 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.for.body: + ; CHECK-NEXT: successors: %bb.7(0x7e000000), %bb.2(0x02000000) + ; CHECK-NEXT: liveins: $w9, $x0, $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDURWi renamable $x11, -4 + ; CHECK-NEXT: STRWui killed renamable $w8, renamable $x11, 0 + ; CHECK-NEXT: renamable $x8 = nuw nsw ADDXri renamable $x12, 2, 0 + ; CHECK-NEXT: renamable $w9 = nuw nsw ADDWri killed renamable $w9, 1, 0 + ; CHECK-NEXT: early-clobber renamable $x10, renamable $w13 = LDRBBpost killed renamable $x10, 4 + ; CHECK-NEXT: TBZW killed renamable $w13, 3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.for.body: + ; CHECK-NEXT: successors: %bb.6(0x7df7df7e), %bb.2(0x02082082) + ; CHECK-NEXT: liveins: $w9, $x0, $x8, $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11 = ADDXri killed renamable $x11, 8, 0 + ; CHECK-NEXT: dead $xzr = SUBSXri killed renamable $x12, 598, 0, implicit-def $nzcv + ; CHECK-NEXT: $x12 = ORRXrs $xzr, $x8, 0 + ; CHECK-NEXT: Bcc 0, %bb.2, implicit $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.for.cond28.preheader: + ; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.9(0x40000000) + ; CHECK-NEXT: liveins: $x0, $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w9 = LDRBBui renamable $x0, 4 + ; CHECK-NEXT: TBZW killed renamable $w9, 3, %bb.12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9.for.body36.preheader: + ; CHECK-NEXT: successors: %bb.10(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w9 = MOVZWi 8, 0, implicit-def $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10.for.body36: + ; CHECK-NEXT: successors: %bb.11(0x7e000000), %bb.12(0x02000000) + ; CHECK-NEXT: liveins: $x0, $x8, $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w10 = SUBWri renamable $w8, 1, 0 + ; CHECK-NEXT: renamable $w10 = LDRWroW renamable $x0, killed renamable $w10, 0, 1 + ; CHECK-NEXT: early-clobber renamable $x8 = STRWpost killed renamable $w10, renamable $x8, 2 + ; CHECK-NEXT: renamable $w10 = LDRBBroX renamable $x0, renamable $x9, 0, 0 + ; CHECK-NEXT: TBZW killed renamable $w10, 3, %bb.12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.11.for.body36: + ; CHECK-NEXT: successors: %bb.10(0x7df7df7e), %bb.12(0x02082082) + ; CHECK-NEXT: liveins: $x0, $x8, $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $xzr = SUBSXri renamable $x9, 1200, 0, implicit-def $nzcv + ; CHECK-NEXT: renamable $x9 = nuw nsw ADDXri killed renamable $x9, 4, 0 + ; CHECK-NEXT: Bcc 1, %bb.10, implicit $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.12.for.end45: + ; CHECK-NEXT: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.13.if.end: + ; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.14(0x7c000000) + ; CHECK-NEXT: liveins: $x0, $x8, $x9, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w11 = LDRBBui renamable $x10, 0 + ; CHECK-NEXT: TBZW killed renamable $w11, 3, %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.14.while.body: + ; CHECK-NEXT: successors: %bb.13(0x7c000000), %bb.15(0x04000000) + ; CHECK-NEXT: liveins: $x0, $x8, $x9, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w11 = SUBWri renamable $w8, 1, 0 + ; CHECK-NEXT: renamable $w11 = LDRWroW renamable $x0, killed renamable $w11, 0, 1 + ; CHECK-NEXT: early-clobber renamable $x8 = STRWpost renamable $w11, renamable $x8, 2 + ; CHECK-NEXT: TBZW killed renamable $w11, 2, %bb.13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.15.if.then: + ; CHECK-NEXT: successors: %bb.16(0x7e000000), %bb.8(0x02000000) + ; CHECK-NEXT: liveins: $x0, $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDRWui renamable $x0, 4 + ; CHECK-NEXT: STRWroX killed renamable $w8, renamable $x0, renamable $x9, 0, 1 + ; CHECK-NEXT: renamable $w8 = MOVZWi 3, 0, implicit-def $x8 + ; CHECK-NEXT: dead $xzr = SUBSXri renamable $x9, 298, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 8, %bb.8, implicit $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.16.if.then: + ; CHECK-NEXT: successors: %bb.14(0x7df7df7e), %bb.8(0x02082082) + ; CHECK-NEXT: liveins: $x0, $x8, $x9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x9 = nuw nsw ADDXri killed renamable $x9, 1, 0 + ; CHECK-NEXT: renamable $x10 = nuw ADDXrs renamable $x0, renamable $x9, 2 + ; CHECK-NEXT: renamable $w11 = LDRWui renamable $x10, 0 + ; CHECK-NEXT: TBZW killed renamable $w11, 3, %bb.8 + ; CHECK-NEXT: B %bb.14 + bb.0.entry: + successors: %bb.1(0x40000000), %bb.5(0x40000000) + liveins: $x0 + + renamable $w8 = LDRBBui renamable $x0, 4 + TBNZW killed renamable $w8, 3, %bb.5 + + bb.1: + successors: %bb.2(0x80000000) + liveins: $x0 + + renamable $w11 = MOVZWi 1, 0 + renamable $w9 = MOVZWi 1, 0 + renamable $w8 = MOVZWi 2, 0, implicit-def $x8 + + bb.2.while.cond.preheader: + successors: %bb.3(0x60000000), %bb.8(0x20000000) + liveins: $w9, $w11, $x0, $x8 + + dead $wzr = SUBSWri renamable $w11, 299, 0, implicit-def $nzcv + Bcc 8, %bb.8, implicit $nzcv + + bb.3.while.cond.preheader: + successors: %bb.4(0x55555555), %bb.8(0x2aaaaaab) + liveins: $w9, $x0, $x8 + + renamable $x10 = nuw ADDXrx renamable $x0, renamable $w9, 18 + renamable $w11 = LDRWui renamable $x10, 0 + TBZW killed renamable $w11, 3, %bb.8 + + bb.4: + successors: %bb.14(0x80000000) + liveins: $w9, $x0, $x8, $x10 + + renamable $w9 = ORRWrs $wzr, killed renamable $w9, 0, implicit-def $x9 + B %bb.14 + + bb.5.for.body.preheader: + successors: %bb.6(0x80000000) + liveins: $x0 + + renamable $x10 = ADDXri renamable $x0, 8, 0 + renamable $w9 = MOVZWi 1, 0 + renamable $w12 = MOVZWi 2, 0, implicit-def $x12 + $x11 = ORRXrs $xzr, $x10, 0 + + bb.6.for.body: + successors: %bb.7(0x7e000000), %bb.2(0x02000000) + liveins: $w9, $x0, $x10, $x11, $x12 + + renamable $w8 = LDURWi renamable $x11, -4 + STRWui killed renamable $w8, renamable $x11, 0 + renamable $x8 = nuw nsw ADDXri renamable $x12, 2, 0 + renamable $w9 = nuw nsw ADDWri killed renamable $w9, 1, 0 + early-clobber renamable $x10, renamable $w13 = LDRBBpost killed renamable $x10, 4 + TBZW killed renamable $w13, 3, %bb.2 + + bb.7.for.body: + successors: %bb.6(0x7df7df7e), %bb.2(0x02082082) + liveins: $w9, $x0, $x8, $x10, $x11, $x12 + + renamable $x11 = ADDXri killed renamable $x11, 8, 0 + dead $xzr = SUBSXri killed renamable $x12, 598, 0, implicit-def $nzcv + $x12 = ORRXrs $xzr, $x8, 0 + Bcc 0, %bb.2, implicit $nzcv + B %bb.6 + + bb.8.for.cond28.preheader: + successors: %bb.12(0x40000000), %bb.9(0x40000000) + liveins: $x0, $x8 + + renamable $w9 = LDRBBui renamable $x0, 4 + TBZW killed renamable $w9, 3, %bb.12 + + bb.9.for.body36.preheader: + successors: %bb.10(0x80000000) + liveins: $x0, $x8 + + renamable $w9 = MOVZWi 8, 0, implicit-def $x9 + + bb.10.for.body36: + successors: %bb.11(0x7e000000), %bb.12(0x02000000) + liveins: $x0, $x8, $x9 + + renamable $w10 = SUBWri renamable $w8, 1, 0 + renamable $w10 = LDRWroW renamable $x0, killed renamable $w10, 0, 1 + STRWui killed renamable $w10, renamable $x8, 0 + renamable $w10 = LDRBBroX renamable $x0, renamable $x9, 0, 0 + TBZW killed renamable $w10, 3, %bb.12 + + bb.11.for.body36: + successors: %bb.10(0x7df7df7e), %bb.12(0x02082082) + liveins: $x0, $x8, $x9 + + renamable $x8 = ADDXri renamable $x8, 2, 0, implicit killed $x8, implicit-def $x8 + dead $xzr = SUBSXri renamable $x9, 1200, 0, implicit-def $nzcv + renamable $x9 = nuw nsw ADDXri killed renamable $x9, 4, 0 + Bcc 1, %bb.10, implicit $nzcv + + bb.12.for.end45: + RET undef $lr + + bb.13.if.end: + successors: %bb.8(0x04000000), %bb.14(0x7c000000) + liveins: $x0, $x8, $x9, $x10 + + renamable $x8 = ADDXri renamable $x8, 2, 0, implicit killed $x8, implicit-def $x8 + renamable $w11 = LDRBBui renamable $x10, 0 + TBZW killed renamable $w11, 3, %bb.8 + + bb.14.while.body: + successors: %bb.13(0x7c000000), %bb.15(0x04000000) + liveins: $x0, $x8, $x9, $x10 + + renamable $w11 = SUBWri renamable $w8, 1, 0 + renamable $w11 = LDRWroW renamable $x0, killed renamable $w11, 0, 1 + STRWui renamable $w11, renamable $x8, 0 + TBZW killed renamable $w11, 2, %bb.13 + + bb.15.if.then: + successors: %bb.16(0x7e000000), %bb.8(0x02000000) + liveins: $x0, $x9 + + renamable $w8 = LDRWui renamable $x0, 4 + STRWroX killed renamable $w8, renamable $x0, renamable $x9, 0, 1 + renamable $w8 = MOVZWi 3, 0, implicit-def $x8 + dead $xzr = SUBSXri renamable $x9, 298, 0, implicit-def $nzcv + Bcc 8, %bb.8, implicit $nzcv + + bb.16.if.then: + successors: %bb.14(0x7df7df7e), %bb.8(0x02082082) + liveins: $x0, $x8, $x9 + + renamable $x9 = nuw nsw ADDXri killed renamable $x9, 1, 0 + renamable $x10 = nuw ADDXrs renamable $x0, renamable $x9, 2 + renamable $w11 = LDRWui renamable $x10, 0 + TBZW killed renamable $w11, 3, %bb.8 + B %bb.14 +... diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir b/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir index dc75c8c61c53c..c944889ede695 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-maddimm.mir @@ -14,8 +14,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK-NEXT: [[MOVZWi:%[0-9]+]]:gpr32common = nsw MOVZWi 79, 0 - ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVZWi]] + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 79 + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]] ; CHECK-NEXT: $w0 = COPY [[MADDWrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr32 = COPY $w0 @@ -38,8 +38,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[MOVZXi:%[0-9]+]]:gpr64common = nsw MOVZXi 79, 0 - ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVZXi]] + ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 79 + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]] ; CHECK-NEXT: $x0 = COPY [[MADDXrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr64 = COPY $x0 @@ -62,8 +62,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK-NEXT: [[MOVNWi:%[0-9]+]]:gpr32common = nsw MOVNWi 0, 0 - ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVNWi]] + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm -1 + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]] ; CHECK-NEXT: $w0 = COPY [[MADDWrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr32 = COPY $w0 @@ -86,8 +86,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[MOVNXi:%[0-9]+]]:gpr64common = nsw MOVNXi 0, 0 - ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVNXi]] + ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm -1 + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]] ; CHECK-NEXT: $x0 = COPY [[MADDXrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr64 = COPY $x0 @@ -110,8 +110,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK-NEXT: [[ORRWri:%[0-9]+]]:gpr32common = nsw ORRWri $wzr, 1291 - ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[ORRWri]] + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = nsw MOVi32imm 16773120 + ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32common = nsw MADDWrrr [[COPY1]], [[COPY]], [[MOVi32imm]] ; CHECK-NEXT: $w0 = COPY [[MADDWrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:gpr32 = COPY $w0 @@ -134,8 +134,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[ORRXri:%[0-9]+]]:gpr64common = nsw ORRXri $xzr, 7435 - ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[ORRXri]] + ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = nsw MOVi64imm 16773120 + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64common = nsw MADDXrrr [[COPY1]], [[COPY]], [[MOVi64imm]] ; CHECK-NEXT: $x0 = COPY [[MADDXrrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr64 = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/madd-combiner.ll b/llvm/test/CodeGen/AArch64/madd-combiner.ll index 6e510712fbd21..cc7fc8fc98629 100644 --- a/llvm/test/CodeGen/AArch64/madd-combiner.ll +++ b/llvm/test/CodeGen/AArch64/madd-combiner.ll @@ -39,9 +39,8 @@ define void @mul_add_imm2() { ; CHECK-FAST-LABEL: mul_add_imm2: ; CHECK-FAST: ; %bb.0: ; %entry ; CHECK-FAST-NEXT: mov x8, #-3 ; =0xfffffffffffffffd -; CHECK-FAST-NEXT: mov x9, #-3 ; =0xfffffffffffffffd -; CHECK-FAST-NEXT: madd x8, x8, x8, x9 ; CHECK-FAST-NEXT: mov x9, #45968 ; =0xb390 +; CHECK-FAST-NEXT: madd x8, x8, x8, x8 ; CHECK-FAST-NEXT: movk x9, #48484, lsl #16 ; CHECK-FAST-NEXT: movk x9, #323, lsl #32 ; CHECK-FAST-NEXT: LBB2_1: ; %for.body8 diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll index 8c1d41f71c1ec..5c58eab391972 100644 --- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll +++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll @@ -6,8 +6,8 @@ target triple = "aarch64-unknown-linux-gnu" define @fmla_nxv8bf16( %acc, %m1, %m2) { ; CHECK-LABEL: fmla_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfadd z0.h, z0.h, z1.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: bfmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %res = fadd contract %acc, %mul @@ -17,8 +17,8 @@ define @fmla_nxv8bf16( %acc, @fmla_nxv4bf16( %acc, %m1, %m2) { ; CHECK-LABEL: fmla_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfadd z0.h, z0.h, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: bfmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %res = fadd contract %acc, %mul @@ -28,8 +28,8 @@ define @fmla_nxv4bf16( %acc, @fmla_nxv2bf16( %acc, %m1, %m2) { ; CHECK-LABEL: fmla_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfadd z0.h, z0.h, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: bfmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %res = fadd contract %acc, %mul @@ -39,8 +39,8 @@ define @fmla_nxv2bf16( %acc, @fmls_nxv8bf16( %acc, %m1, %m2) { ; CHECK-LABEL: fmls_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfsub z0.h, z0.h, z1.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: bfmls z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %res = fsub contract %acc, %mul @@ -50,8 +50,8 @@ define @fmls_nxv8bf16( %acc, @fmls_nxv4bf16( %acc, %m1, %m2) { ; CHECK-LABEL: fmls_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfsub z0.h, z0.h, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: bfmls z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %res = fsub contract %acc, %mul @@ -61,8 +61,8 @@ define @fmls_nxv4bf16( %acc, @fmls_nxv2bf16( %acc, %m1, %m2) { ; CHECK-LABEL: fmls_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfsub z0.h, z0.h, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: bfmls z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %res = fsub contract %acc, %mul @@ -72,9 +72,7 @@ define @fmls_nxv2bf16( %acc, @fmla_sel_nxv8bf16( %pred, %acc, %m1, %m2) { ; CHECK-LABEL: fmla_sel_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfadd z1.h, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: bfmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %add = fadd contract %acc, %mul @@ -85,9 +83,7 @@ define @fmla_sel_nxv8bf16( %pred, @fmla_sel_nxv4bf16( %pred, %acc, %m1, %m2) { ; CHECK-LABEL: fmla_sel_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfadd z1.h, z0.h, z1.h -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: bfmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %add = fadd contract %acc, %mul @@ -98,9 +94,7 @@ define @fmla_sel_nxv4bf16( %pred, @fmla_sel_nxv2bf16( %pred, %acc, %m1, %m2) { ; CHECK-LABEL: fmla_sel_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfadd z1.h, z0.h, z1.h -; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: bfmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %add = fadd contract %acc, %mul @@ -111,9 +105,7 @@ define @fmla_sel_nxv2bf16( %pred, @fmls_sel_nxv8bf16( %pred, %acc, %m1, %m2) { ; CHECK-LABEL: fmls_sel_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfsub z1.h, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: bfmls z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %sub = fsub contract %acc, %mul @@ -124,9 +116,7 @@ define @fmls_sel_nxv8bf16( %pred, @fmls_sel_nxv4bf16( %pred, %acc, %m1, %m2) { ; CHECK-LABEL: fmls_sel_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfsub z1.h, z0.h, z1.h -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: bfmls z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %sub = fsub contract %acc, %mul @@ -137,9 +127,7 @@ define @fmls_sel_nxv4bf16( %pred, @fmls_sel_nxv2bf16( %pred, %acc, %m1, %m2) { ; CHECK-LABEL: fmls_sel_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: bfmul z1.h, z1.h, z2.h -; CHECK-NEXT: bfsub z1.h, z0.h, z1.h -; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: bfmls z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %mul = fmul contract %m1, %m2 %sub = fsub contract %acc, %mul diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll index 584c29ebcfc04..1b6b92af8c64a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -16,19 +16,16 @@ define @test_signed_v2f32_v2i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-822083584 // =0xcf000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff -; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: mov z3.d, #0x7fffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff80000000 +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffff ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.s +; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f32.nxv2i32( %f) @@ -40,19 +37,16 @@ define @test_signed_v4f32_v4i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-822083584 // =0xcf000000 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff -; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: mov z3.s, #0x7fffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, #0x80000000 +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, #0x7fffffff ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.s, p1/m, z2.s -; CHECK-NEXT: sel z0.s, p2, z3.s, z1.s +; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.s +; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i32( %f) @@ -62,39 +56,26 @@ define @test_signed_v4f32_v4i32( %f) { define @test_signed_v8f32_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #-822083584 // =0xcf000000 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z6.s, #0x7fffffff +; CHECK-NEXT: mov z3.s, #0x80000000 ; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff -; CHECK-NEXT: mov z3.s, w8 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.s, p0/m, z0.s -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: fcvtzs z5.s, p0/m, z1.s +; CHECK-NEXT: mov z4.s, #0x80000000 +; CHECK-NEXT: mov z5.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s ; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z2.s -; CHECK-NEXT: mov z2.s, #0x80000000 -; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z3.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z1.s, z3.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.s, p1, z2.s, z4.s -; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z5.s +; CHECK-NEXT: fcvtzs z3.s, p1/m, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z5.s +; CHECK-NEXT: fcvtzs z4.s, p2/m, z1.s +; CHECK-NEXT: fcmuo p2.s, p0/z, z0.s, z0.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s -; CHECK-NEXT: sel z2.s, p2, z2.s, z5.s -; CHECK-NEXT: sel z0.s, p3, z6.s, z3.s -; CHECK-NEXT: sel z1.s, p4, z6.s, z2.s -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.s, p1, z2.s, z3.s +; CHECK-NEXT: sel z1.s, p3, z2.s, z4.s +; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i32( %f) ret %x @@ -105,19 +86,17 @@ define @test_signed_v4f32_v4i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #65024 // =0xfe00 ; CHECK-NEXT: movk w8, #18175, lsl #16 -; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.s, #32767 // =0x7fff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s +; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.s +; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) @@ -127,40 +106,28 @@ define @test_signed_v4f32_v4i16( %f) { define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z5.s, #32767 // =0x7fff +; CHECK-NEXT: mov z3.s, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: mov w8, #65024 // =0xfe00 +; CHECK-NEXT: mov z5.s, #32767 // =0x7fff ; CHECK-NEXT: movk w8, #18175, lsl #16 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: fcvtzs z3.s, p0/m, z1.s -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.s, p0/m, z0.s +; CHECK-NEXT: mov z4.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z2.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z2.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z3.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s +; CHECK-NEXT: fcvtzs z3.s, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z4.s +; CHECK-NEXT: fcvtzs z2.s, p2/m, z0.s +; CHECK-NEXT: fcmuo p2.s, p0/z, z1.s, z1.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z4.s, p2/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: sel z0.s, p3, z5.s, z3.s -; CHECK-NEXT: sel z1.s, p4, z5.s, z4.s -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.s, p1, z5.s, z3.s +; CHECK-NEXT: sel z1.s, p3, z5.s, z2.s +; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) ret %x @@ -171,19 +138,16 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff -; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.s +; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f32.nxv2i64( %f) @@ -193,41 +157,28 @@ define @test_signed_v2f32_v2i64( %f) { define @test_signed_v4f32_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff -; CHECK-NEXT: mov z3.s, w8 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: mov z5.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.s -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z3.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z3.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z5.s +; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z5.s +; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.s +; CHECK-NEXT: fcmuo p2.s, p0/z, z1.s, z1.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i64( %f) ret %x @@ -248,20 +199,17 @@ define @test_signed_v2f64_v2i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000 ; CHECK-NEXT: movk x8, #16863, lsl #48 -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d -; CHECK-NEXT: mov z3.d, #0x7fffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff80000000 +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffff ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.d +; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) @@ -271,41 +219,28 @@ define @test_signed_v2f64_v2i32( %f) { define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z6.d, #0x7fffffff +; CHECK-NEXT: mov z3.d, #0xffffffff80000000 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000 +; CHECK-NEXT: mov z4.d, #0xffffffff80000000 ; CHECK-NEXT: movk x8, #16863, lsl #48 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.d -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.d -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z5.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z2.d, #0xffffffff80000000 -; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z3.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z3.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z5.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z5.d +; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z1.d, z1.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 ; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) ret %x @@ -316,7 +251,6 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill @@ -327,48 +261,38 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK-NEXT: mov z5.d, #0xffffffff80000000 ; CHECK-NEXT: mov z4.d, x8 ; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000 -; CHECK-NEXT: mov z26.d, #0x7fffffff +; CHECK-NEXT: mov z6.d, #0xffffffff80000000 ; CHECK-NEXT: movk x8, #16863, lsl #48 -; CHECK-NEXT: movprfx z7, z0 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z0.d -; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.d -; CHECK-NEXT: mov z6.d, x8 -; CHECK-NEXT: movprfx z25, z2 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z2.d +; CHECK-NEXT: mov z7.d, #0xffffffff80000000 +; CHECK-NEXT: mov z24.d, #0xffffffff80000000 +; CHECK-NEXT: mov z25.d, x8 +; CHECK-NEXT: fcmuo p6.d, p0/z, z0.d, z0.d ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z4.d ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d ; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, z4.d ; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, z4.d -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.d -; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z6.d -; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z6.d -; CHECK-NEXT: fcmgt p7.d, p0/z, z3.d, z6.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z4.d, p1/m, z5.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z2.d, z6.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d -; CHECK-NEXT: fcmuo p2.d, p0/z, z1.d, z1.d -; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d -; CHECK-NEXT: fcmuo p3.d, p0/z, z0.d, z0.d -; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d -; CHECK-NEXT: fcmuo p4.d, p0/z, z3.d, z3.d +; CHECK-NEXT: mov z4.d, #0x7fffffff +; CHECK-NEXT: fcmgt p5.d, p0/z, z2.d, z25.d +; CHECK-NEXT: fcvtzs z5.d, p1/m, z1.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z25.d +; CHECK-NEXT: fcvtzs z6.d, p2/m, z0.d +; CHECK-NEXT: fcvtzs z7.d, p3/m, z3.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z25.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z3.d, z25.d +; CHECK-NEXT: fcvtzs z24.d, p4/m, z2.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z1.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z4.d, z5.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d -; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d -; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: sel z1.d, p2, z4.d, z6.d +; CHECK-NEXT: sel z2.d, p3, z4.d, z7.d +; CHECK-NEXT: sel z3.d, p5, z4.d, z24.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s ; CHECK-NEXT: uzp1 z1.s, z3.s, z2.s @@ -382,40 +306,28 @@ define @test_signed_v8f64_v8i32( %f) { define @test_signed_v4f64_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z5.d, #32767 // =0x7fff +; CHECK-NEXT: mov z3.d, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000 +; CHECK-NEXT: mov z5.d, #32767 // =0x7fff ; CHECK-NEXT: movk x8, #16607, lsl #48 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z1.d -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: mov z4.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z2.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z3.d, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: mov z2.d, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d +; CHECK-NEXT: fcvtzs z2.d, p2/m, z0.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z1.d, z1.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z4.d, p2/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: sel z0.d, p3, z5.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z5.d, z4.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z5.d, z3.d +; CHECK-NEXT: sel z1.d, p3, z5.d, z2.d +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 ; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i16( %f) ret %x @@ -426,7 +338,6 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill @@ -434,50 +345,41 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z25.d, #32767 // =0x7fff +; CHECK-NEXT: mov z5.d, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: mov z4.d, x8 ; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000 +; CHECK-NEXT: mov z6.d, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: movk x8, #16607, lsl #48 -; CHECK-NEXT: movprfx z6, z2 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z2.d -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.d -; CHECK-NEXT: mov z5.d, x8 -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z0.d +; CHECK-NEXT: mov z7.d, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: mov z25.d, #32767 // =0x7fff +; CHECK-NEXT: mov z24.d, x8 +; CHECK-NEXT: fcmuo p6.d, p0/z, z2.d, z2.d ; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d ; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d ; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, z4.d ; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, z4.d -; CHECK-NEXT: movprfx z4, z3 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z3.d -; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z5.d -; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z5.d -; CHECK-NEXT: fcmgt p7.d, p0/z, z1.d, z5.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z4.d, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z6.d, p2/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmuo p2.d, p0/z, z3.d, z3.d -; CHECK-NEXT: mov z7.d, p3/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmuo p3.d, p0/z, z2.d, z2.d -; CHECK-NEXT: mov z24.d, p4/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmuo p4.d, p0/z, z1.d, z1.d +; CHECK-NEXT: mov z4.d, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z24.d +; CHECK-NEXT: fcvtzs z5.d, p1/m, z3.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z24.d +; CHECK-NEXT: fcvtzs z6.d, p2/m, z2.d +; CHECK-NEXT: fcvtzs z7.d, p3/m, z1.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z24.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z24.d +; CHECK-NEXT: fcvtzs z4.d, p4/m, z0.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z3.d, z3.d +; CHECK-NEXT: sel z2.d, p1, z25.d, z5.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: sel z2.d, p5, z25.d, z4.d -; CHECK-NEXT: sel z0.d, p6, z25.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z1.d, p7, z25.d, z7.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z25.d, z24.d +; CHECK-NEXT: sel z0.d, p2, z25.d, z6.d +; CHECK-NEXT: sel z1.d, p3, z25.d, z7.d +; CHECK-NEXT: sel z3.d, p5, z25.d, z4.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z2.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p6/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s ; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s @@ -494,19 +396,16 @@ define @test_signed_v2f64_v2i64( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.d +; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i64( %f) @@ -516,39 +415,26 @@ define @test_signed_v2f64_v2i64( %f) { define @test_signed_v4f64_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff -; CHECK-NEXT: mov z3.d, x8 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z1.d +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: mov z5.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d ; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z3.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z3.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z5.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d +; CHECK-NEXT: fcvtzs z4.d, p2/m, z1.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z1.d, z1.d -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i64( %f) ret %x @@ -570,19 +456,16 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h -; CHECK-NEXT: mov z3.d, #0x7fffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0xffffffff80000000 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h +; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) @@ -594,19 +477,16 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h -; CHECK-NEXT: mov z3.s, #0x7fffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.s, #0x80000000 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, #0x7fffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.s, p1/m, z2.s -; CHECK-NEXT: sel z0.s, p2, z3.s, z1.s +; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h +; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) @@ -616,41 +496,28 @@ define @test_signed_v4f16_v4i32( %f) { define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z3.h, w8 -; CHECK-NEXT: mov z6.s, #0x7fffffff +; CHECK-NEXT: mov z3.s, #0x80000000 +; CHECK-NEXT: mov z4.s, #0x80000000 +; CHECK-NEXT: mov z5.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h ; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #0x80000000 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.s, p0/m, z1.h -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.s, p0/m, z0.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z3.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z3.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.s, p1, z2.s, z4.s -; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z1.h +; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h +; CHECK-NEXT: fcvtzs z3.s, p1/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z4.s, p2/m, z0.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z2.s, p2, z2.s, z5.s -; CHECK-NEXT: sel z0.s, p3, z6.s, z3.s -; CHECK-NEXT: sel z1.s, p4, z6.s, z2.s -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.s, p1, z2.s, z3.s +; CHECK-NEXT: sel z1.s, p3, z2.s, z4.s +; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i32( %f) ret %x @@ -661,18 +528,16 @@ define @test_signed_v4f16_v4i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #63488 // =0xf800 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #30719 // =0x77ff -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #32767 // =0x7fff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.s, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s +; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h +; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) @@ -684,18 +549,16 @@ define @test_signed_v8f16_v8i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #63488 // =0xf800 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z2.h, #-32768 // =0xffffffffffff8000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #30719 // =0x77ff -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.h, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.h, #32767 // =0x7fff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.h, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: sel z0.h, p2, z2.h, z1.h +; CHECK-NEXT: fcvtzs z2.h, p1/m, z0.h +; CHECK-NEXT: sel z0.h, p2, z1.h, z2.h ; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) @@ -707,19 +570,16 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h +; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) @@ -729,41 +589,28 @@ define @test_signed_v2f16_v2i64( %f) { define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z3.h, w8 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: mov z5.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h ; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z3.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z3.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z1.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h +; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i64( %f) ret %x diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll index ed352ffec339f..b3aefb8460985 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll @@ -16,15 +16,13 @@ define @test_signed_v2f32_v2i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: fcvtzu z1.d, p1/m, z0.s ; CHECK-NEXT: mov z0.d, #0xffffffff -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f32.nxv2i32( %f) ret %x @@ -35,13 +33,11 @@ define @test_signed_v4f32_v4i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcvtzu z1.s, p1/m, z0.s ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -53,21 +49,17 @@ define @test_signed_v8f32_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff +; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: mov z4.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcvtzu z2.s, p1/m, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z4.s ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s -; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.s, p3/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcvtzu z3.s, p2/m, z1.s +; CHECK-NEXT: mov z2.s, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z3.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: mov z1.d, z3.d @@ -81,16 +73,14 @@ define @test_signed_v4f32_v4i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, #65280 // =0xff00 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: movk w8, #18303, lsl #16 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: fcvtzu z1.s, p1/m, z0.s ; CHECK-NEXT: mov z0.s, #65535 // =0xffff -; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) ret %x @@ -101,24 +91,20 @@ define @test_signed_v8f32_v8i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, #65280 // =0xff00 +; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: movk w8, #18303, lsl #16 +; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0 ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzu z4.s, p0/m, z0.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z2.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z0.s, #65535 // =0xffff -; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z4.s, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p3, z0.s, z3.s -; CHECK-NEXT: sel z0.s, p0, z0.s, z4.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: fcvtzu z2.s, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z4.s +; CHECK-NEXT: mov z1.s, #65535 // =0xffff +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z4.s +; CHECK-NEXT: fcvtzu z3.s, p2/m, z0.s +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s +; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s +; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) ret %x @@ -129,13 +115,11 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcvtzu z1.d, p1/m, z0.s ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -150,20 +134,16 @@ define @test_signed_v4f32_v4i64( %f) { ; CHECK-NEXT: uunpkhi z3.d, z0.s ; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z4.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, #0.0 ; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z2.s, z4.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcvtzu z0.d, p1/m, z2.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z4.s ; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z4.s -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcvtzu z1.d, p2/m, z3.s +; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i64( %f) @@ -185,16 +165,14 @@ define @test_signed_v2f64_v2i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: fcvtzu z1.d, p1/m, z0.d ; CHECK-NEXT: mov z0.d, #0xffffffff -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) ret %x @@ -205,24 +183,20 @@ define @test_signed_v4f64_v4i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 +; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 +; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z0.d, #0xffffffff -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d -; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: fcvtzu z2.d, p1/m, z1.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d +; CHECK-NEXT: fcvtzu z3.d, p2/m, z0.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d +; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d +; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) ret %x @@ -233,47 +207,35 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 +; CHECK-NEXT: movi v4.2d, #0000000000000000 +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: movi v6.2d, #0000000000000000 ; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 ; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0 +; CHECK-NEXT: movi v7.2d, #0000000000000000 ; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0 -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: fcvtzu z5.d, p0/m, z1.d -; CHECK-NEXT: mov z4.d, x8 -; CHECK-NEXT: movprfx z6, z0 -; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d -; CHECK-NEXT: movprfx z7, z3 -; CHECK-NEXT: fcvtzu z7.d, p0/m, z3.d -; CHECK-NEXT: movprfx z24, z2 -; CHECK-NEXT: fcvtzu z24.d, p0/m, z2.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z4.d -; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z4.d -; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z24.d, x8 +; CHECK-NEXT: fcvtzu z4.d, p1/m, z1.d +; CHECK-NEXT: fcvtzu z5.d, p2/m, z0.d +; CHECK-NEXT: fcvtzu z6.d, p3/m, z3.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z24.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z24.d ; CHECK-NEXT: mov z0.d, #0xffffffff -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z4.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z4.d -; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p5, z0.d, z5.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p6, z0.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z0.d, z7.d +; CHECK-NEXT: fcvtzu z7.d, p4/m, z2.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z3.d, z24.d ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z4.d, p0, z0.d, z24.d +; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z24.d +; CHECK-NEXT: sel z1.d, p1, z0.d, z4.d +; CHECK-NEXT: sel z2.d, p2, z0.d, z5.d +; CHECK-NEXT: sel z3.d, p3, z0.d, z6.d +; CHECK-NEXT: sel z4.d, p0, z0.d, z7.d ; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s ; CHECK-NEXT: uzp1 z1.s, z4.s, z3.s ; CHECK-NEXT: addvl sp, sp, #1 @@ -288,24 +250,20 @@ define @test_signed_v4f64_v4i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000 +; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z0.d, #65535 // =0xffff -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d -; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: fcvtzu z2.d, p1/m, z1.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z1.d, #65535 // =0xffff +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d +; CHECK-NEXT: fcvtzu z3.d, p2/m, z0.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d +; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d +; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i16( %f) ret %x @@ -316,47 +274,35 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000 +; CHECK-NEXT: movi v4.2d, #0000000000000000 +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: movi v6.2d, #0000000000000000 ; CHECK-NEXT: movk x8, #16623, lsl #48 ; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0 ; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0 ; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: movi v7.2d, #0000000000000000 ; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: movprfx z5, z3 -; CHECK-NEXT: fcvtzu z5.d, p0/m, z3.d -; CHECK-NEXT: mov z4.d, x8 -; CHECK-NEXT: movprfx z6, z2 -; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: fcvtzu z7.d, p0/m, z1.d -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: fcvtzu z24.d, p0/m, z0.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z4.d -; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z4.d -; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z24.d, x8 +; CHECK-NEXT: fcvtzu z4.d, p1/m, z3.d +; CHECK-NEXT: fcvtzu z5.d, p2/m, z2.d +; CHECK-NEXT: fcvtzu z6.d, p3/m, z1.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z24.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z24.d ; CHECK-NEXT: mov z2.d, #65535 // =0xffff -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d -; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p5, z2.d, z5.d -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z1.d, p6, z2.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z2.d, z7.d +; CHECK-NEXT: fcvtzu z7.d, p4/m, z0.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z24.d ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p0, z2.d, z24.d +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z24.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z4.d +; CHECK-NEXT: sel z1.d, p2, z2.d, z5.d +; CHECK-NEXT: sel z3.d, p3, z2.d, z6.d +; CHECK-NEXT: sel z2.d, p0, z2.d, z7.d ; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s ; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s ; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h @@ -372,13 +318,11 @@ define @test_signed_v2f64_v2i64( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcvtzu z1.d, p1/m, z0.d ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -390,21 +334,17 @@ define @test_signed_v4f64_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff +; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: mov z4.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 ; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcvtzu z2.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d ; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p3/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcvtzu z3.d, p2/m, z1.d +; CHECK-NEXT: mov z2.d, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z3.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: mov z1.d, z3.d @@ -429,15 +369,13 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: fcvtzu z1.d, p1/m, z0.h ; CHECK-NEXT: mov z0.d, #0xffffffff -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) ret %x @@ -448,13 +386,11 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcvtzu z1.s, p1/m, z0.h ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -469,20 +405,16 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-NEXT: uunpkhi z3.s, z0.h ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0 ; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvtzu z1.s, p0/m, z3.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcvtzu z0.s, p1/m, z2.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z4.h ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h -; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z0.s, p3/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcvtzu z1.s, p2/m, z3.h +; CHECK-NEXT: mov z0.s, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f16.nxv8i32( %f) @@ -494,15 +426,13 @@ define @test_signed_v4f16_v4i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: fcvtzu z1.s, p1/m, z0.h ; CHECK-NEXT: mov z0.s, #65535 // =0xffff -; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) ret %x @@ -513,13 +443,11 @@ define @test_signed_v8f16_v8i16( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 +; CHECK-NEXT: fcvtzu z1.h, p1/m, z0.h ; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -532,13 +460,11 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcvtzu z1.d, p1/m, z0.h ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -553,20 +479,16 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-NEXT: uunpkhi z3.d, z0.s ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0 ; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcvtzu z0.d, p1/m, z2.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z4.h ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcvtzu z1.d, p2/m, z3.h +; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i64( %f) diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll index 16e0e0c4661b6..b0198cf9d1247 100644 --- a/llvm/test/CodeGen/AArch64/sve-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll @@ -6,20 +6,17 @@ define @llrint_v1i64_v1f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f16( %x) @@ -32,20 +29,17 @@ define @llrint_v1i64_v2f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f16( %x) @@ -56,43 +50,30 @@ declare @llvm.llrint.nxv2i64.nxv2f16() define @llrint_v4i64_v4f16( %x) { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z3.h, w8 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: mov z5.h, w8 ; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h ; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z3.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z3.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z1.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmuo p3.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z5.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p2, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv4i64.nxv4f16( %x) ret %a @@ -104,7 +85,6 @@ define @llrint_v8i64_v8f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill @@ -116,8 +96,10 @@ define @llrint_v8i64_v8f16( %x) { ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z6.h, w8 -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z6.d, #0x8000000000000000 +; CHECK-NEXT: mov z25.h, w8 +; CHECK-NEXT: mov z7.d, #0x8000000000000000 +; CHECK-NEXT: mov z24.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s @@ -132,41 +114,29 @@ define @llrint_v8i64_v8f16( %x) { ; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, z4.h ; CHECK-NEXT: fcmge p3.h, p0/z, z3.h, z4.h ; CHECK-NEXT: fcmge p4.h, p0/z, z5.h, z4.h -; CHECK-NEXT: movprfx z4, z2 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.h -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.h -; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.h -; CHECK-NEXT: movprfx z25, z5 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z5.h -; CHECK-NEXT: fcmgt p7.h, p0/z, z3.h, z6.h -; CHECK-NEXT: fcmgt p5.h, p0/z, z2.h, z6.h -; CHECK-NEXT: fcmgt p6.h, p0/z, z1.h, z6.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z4.d, p1/m, z0.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z6.h -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: sel z6.d, p2, z0.d, z7.d -; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h -; CHECK-NEXT: sel z7.d, p3, z0.d, z24.d -; CHECK-NEXT: fcmuo p3.h, p0/z, z1.h, z1.h -; CHECK-NEXT: sel z24.d, p4, z0.d, z25.d -; CHECK-NEXT: fcmuo p4.h, p0/z, z3.h, z3.h +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p5.h, p0/z, z5.h, z25.h +; CHECK-NEXT: fcmuo p6.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcvtzs z0.d, p1/m, z2.h +; CHECK-NEXT: fcvtzs z6.d, p2/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z25.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z1.h, z25.h +; CHECK-NEXT: fcvtzs z7.d, p3/m, z3.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z3.h, z25.h +; CHECK-NEXT: fcvtzs z24.d, p4/m, z5.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z2.h, z2.h +; CHECK-NEXT: mov z0.d, p1/m, z4.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h -; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d -; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z26.d, z24.d +; CHECK-NEXT: sel z1.d, p2, z4.d, z6.d +; CHECK-NEXT: sel z2.d, p3, z4.d, z7.d +; CHECK-NEXT: sel z3.d, p5, z4.d, z24.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -180,7 +150,7 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-LABEL: llrint_v16i64_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill @@ -188,124 +158,110 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z4.s, z1.h -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: mov z5.h, w8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z25.d, #0x8000000000000000 -; CHECK-NEXT: mov z27.h, w8 -; CHECK-NEXT: mov z7.d, #0x7fffffffffffffff -; CHECK-NEXT: uunpklo z3.d, z2.s +; CHECK-NEXT: mov z7.d, #0x8000000000000000 +; CHECK-NEXT: mov z27.d, #0x8000000000000000 +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z6.d, z0.s +; CHECK-NEXT: uunpklo z5.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z24.d, z4.s -; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z26.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z6.d, z3.s +; CHECK-NEXT: uunpklo z25.d, z1.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: mov z10.d, #0x7fffffffffffffff +; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: frintx z2.h, p0/m, z2.h -; CHECK-NEXT: frintx z3.h, p0/m, z3.h +; CHECK-NEXT: frintx z5.h, p0/m, z5.h +; CHECK-NEXT: movprfx z26, z0 +; CHECK-NEXT: frintx z26.h, p0/m, z0.h +; CHECK-NEXT: uunpkhi z0.d, z1.s ; CHECK-NEXT: frintx z6.h, p0/m, z6.h -; CHECK-NEXT: movprfx z28, z0 -; CHECK-NEXT: frintx z28.h, p0/m, z0.h -; CHECK-NEXT: movprfx z29, z4 -; CHECK-NEXT: frintx z29.h, p0/m, z4.h -; CHECK-NEXT: frintx z24.h, p0/m, z24.h -; CHECK-NEXT: movprfx z30, z1 -; CHECK-NEXT: frintx z30.h, p0/m, z1.h -; CHECK-NEXT: frintx z26.h, p0/m, z26.h -; CHECK-NEXT: fcmge p5.h, p0/z, z2.h, z5.h -; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, z5.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.h -; CHECK-NEXT: fcmge p6.h, p0/z, z6.h, z5.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z3.h, z27.h -; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h -; CHECK-NEXT: fcmge p7.h, p0/z, z28.h, z5.h -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h -; CHECK-NEXT: fcmge p8.h, p0/z, z24.h, z5.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z2.h, z27.h -; CHECK-NEXT: fcmge p9.h, p0/z, z26.h, z5.h -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.h -; CHECK-NEXT: fcmge p10.h, p0/z, z30.h, z5.h -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: movprfx z31, z26 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z26.h -; CHECK-NEXT: movprfx z8, z30 -; CHECK-NEXT: fcvtzs z8.d, p0/m, z30.h -; CHECK-NEXT: mov z1.d, p5/m, z25.d -; CHECK-NEXT: fcmge p5.h, p0/z, z29.h, z5.h -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: mov z0.d, p2/m, z25.d -; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h -; CHECK-NEXT: movprfx z2, z28 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z28.h -; CHECK-NEXT: movprfx z5, z29 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z29.h -; CHECK-NEXT: not p7.b, p0/z, p7.b -; CHECK-NEXT: mov z3.d, p6/m, z25.d -; CHECK-NEXT: not p6.b, p0/z, p8.b -; CHECK-NEXT: fcmgt p8.h, p0/z, z6.h, z27.h -; CHECK-NEXT: mov z1.d, p4/m, z7.d -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: mov z0.d, p3/m, z7.d -; CHECK-NEXT: fcmgt p3.h, p0/z, z29.h, z27.h -; CHECK-NEXT: sel z9.d, p7, z25.d, z2.d -; CHECK-NEXT: not p7.b, p0/z, p9.b -; CHECK-NEXT: mov z4.d, p6/m, z25.d -; CHECK-NEXT: not p6.b, p0/z, p10.b -; CHECK-NEXT: fcmgt p10.h, p0/z, z28.h, z27.h -; CHECK-NEXT: mov z5.d, p5/m, z25.d -; CHECK-NEXT: fcmgt p5.h, p0/z, z24.h, z27.h -; CHECK-NEXT: fcmuo p9.h, p0/z, z6.h, z6.h -; CHECK-NEXT: sel z6.d, p7, z25.d, z31.d -; CHECK-NEXT: sel z25.d, p6, z25.d, z8.d -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: fcmgt p6.h, p0/z, z26.h, z27.h -; CHECK-NEXT: fcmgt p7.h, p0/z, z30.h, z27.h -; CHECK-NEXT: fcmuo p4.h, p0/z, z28.h, z28.h -; CHECK-NEXT: sel z2.d, p8, z7.d, z3.d -; CHECK-NEXT: sel z3.d, p10, z7.d, z9.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: fcmuo p8.h, p0/z, z29.h, z29.h -; CHECK-NEXT: mov z4.d, p5/m, z7.d -; CHECK-NEXT: fcmuo p5.h, p0/z, z24.h, z24.h -; CHECK-NEXT: fcmuo p10.h, p0/z, z26.h, z26.h -; CHECK-NEXT: mov z5.d, p3/m, z7.d -; CHECK-NEXT: mov z6.d, p6/m, z7.d +; CHECK-NEXT: movprfx z29, z3 +; CHECK-NEXT: frintx z29.h, p0/m, z3.h +; CHECK-NEXT: frintx z25.h, p0/m, z25.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.h, p0/z, z4.h, z24.h +; CHECK-NEXT: fcmge p2.h, p0/z, z2.h, z24.h +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z24.h +; CHECK-NEXT: movprfx z9, z0 +; CHECK-NEXT: frintx z9.h, p0/m, z0.h +; CHECK-NEXT: fcmge p4.h, p0/z, z26.h, z24.h +; CHECK-NEXT: fcmge p5.h, p0/z, z6.h, z24.h +; CHECK-NEXT: fcmge p7.h, p0/z, z25.h, z24.h +; CHECK-NEXT: fcmge p6.h, p0/z, z29.h, z24.h +; CHECK-NEXT: fcmgt p8.h, p0/z, z6.h, z1.h +; CHECK-NEXT: fcmgt p10.h, p0/z, z25.h, z1.h +; CHECK-NEXT: fcmuo p9.h, p0/z, z5.h, z5.h +; CHECK-NEXT: fcvtzs z7.d, p1/m, z4.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z1.h +; CHECK-NEXT: fcvtzs z27.d, p2/m, z2.h +; CHECK-NEXT: fcmge p2.h, p0/z, z9.h, z24.h +; CHECK-NEXT: mov z24.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z28.d, p3/m, z5.h +; CHECK-NEXT: fcvtzs z3.d, p4/m, z26.h +; CHECK-NEXT: fcvtzs z30.d, p5/m, z6.h +; CHECK-NEXT: fcvtzs z8.d, p7/m, z25.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z5.h, z1.h +; CHECK-NEXT: fcmgt p7.h, p0/z, z26.h, z1.h +; CHECK-NEXT: fcvtzs z31.d, p6/m, z29.h +; CHECK-NEXT: sel z0.d, p1, z10.d, z7.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z29.h, z1.h +; CHECK-NEXT: fcvtzs z24.d, p2/m, z9.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z9.h, z1.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z4.h, z4.h +; CHECK-NEXT: fcmuo p6.h, p0/z, z2.h, z2.h +; CHECK-NEXT: sel z4.d, p8, z10.d, z30.d +; CHECK-NEXT: fcmuo p8.h, p0/z, z25.h, z25.h +; CHECK-NEXT: sel z1.d, p4, z10.d, z27.d +; CHECK-NEXT: fcmuo p4.h, p0/z, z26.h, z26.h +; CHECK-NEXT: sel z2.d, p5, z10.d, z28.d +; CHECK-NEXT: mov z3.d, p7/m, z10.d +; CHECK-NEXT: fcmuo p5.h, p0/z, z6.h, z6.h +; CHECK-NEXT: fcmuo p7.h, p0/z, z29.h, z29.h +; CHECK-NEXT: sel z5.d, p1, z10.d, z31.d +; CHECK-NEXT: sel z6.d, p10, z10.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: fcmuo p0.h, p0/z, z9.h, z9.h +; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: sel z7.d, p2, z10.d, z24.d +; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h -; CHECK-NEXT: sel z7.d, p7, z7.d, z25.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 -; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p7/m, #0 // =0x0 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z6.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv16i64.nxv16f16( %x) @@ -318,6 +274,8 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p11, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill @@ -340,8 +298,8 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG @@ -352,230 +310,191 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: uunpkhi z5.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w9, #64511 // =0xfbff +; CHECK-NEXT: uunpklo z25.s, z1.h +; CHECK-NEXT: uunpkhi z10.s, z1.h +; CHECK-NEXT: uunpklo z9.s, z2.h ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: mov z26.h, w9 -; CHECK-NEXT: uunpkhi z25.s, z1.h +; CHECK-NEXT: uunpkhi z12.s, z3.h +; CHECK-NEXT: mov z27.h, w9 ; CHECK-NEXT: mov w9, #31743 // =0x7bff -; CHECK-NEXT: mov z27.d, #0x8000000000000000 -; CHECK-NEXT: uunpklo z31.s, z2.h -; CHECK-NEXT: uunpkhi z12.s, z2.h -; CHECK-NEXT: mov z17.d, z3.d -; CHECK-NEXT: uunpklo z0.d, z4.s -; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z7.d, z5.s -; CHECK-NEXT: uunpkhi z24.d, z5.s -; CHECK-NEXT: uunpklo z28.d, z6.s -; CHECK-NEXT: uunpkhi z29.d, z6.s -; CHECK-NEXT: uunpklo z8.d, z25.s -; CHECK-NEXT: uunpkhi z9.d, z25.s -; CHECK-NEXT: uunpklo z16.s, z17.h -; CHECK-NEXT: uunpklo z11.d, z31.s -; CHECK-NEXT: uunpkhi z14.d, z31.s -; CHECK-NEXT: uunpkhi z17.s, z17.h -; CHECK-NEXT: movprfx z30, z4 -; CHECK-NEXT: frintx z30.h, p0/m, z4.h -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: frintx z4.h, p0/m, z7.h -; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: movprfx z6, z24 -; CHECK-NEXT: frintx z6.h, p0/m, z24.h -; CHECK-NEXT: movprfx z7, z28 -; CHECK-NEXT: frintx z7.h, p0/m, z28.h -; CHECK-NEXT: movprfx z25, z29 -; CHECK-NEXT: frintx z25.h, p0/m, z29.h -; CHECK-NEXT: movprfx z3, z9 -; CHECK-NEXT: frintx z3.h, p0/m, z9.h -; CHECK-NEXT: mov z5.h, w9 -; CHECK-NEXT: movprfx z31, z11 -; CHECK-NEXT: frintx z31.h, p0/m, z11.h -; CHECK-NEXT: movprfx z9, z14 -; CHECK-NEXT: frintx z9.h, p0/m, z14.h -; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z26.h -; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z26.h -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z0.h -; CHECK-NEXT: fcmge p2.h, p0/z, z30.h, z26.h -; CHECK-NEXT: movprfx z29, z4 -; CHECK-NEXT: fcvtzs z29.d, p0/m, z4.h -; CHECK-NEXT: fcmge p6.h, p0/z, z6.h, z26.h -; CHECK-NEXT: movprfx z28, z30 -; CHECK-NEXT: fcvtzs z28.d, p0/m, z30.h -; CHECK-NEXT: movprfx z10, z6 -; CHECK-NEXT: fcvtzs z10.d, p0/m, z6.h -; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: fcmge p3.h, p0/z, z7.h, z26.h -; CHECK-NEXT: movprfx z13, z7 -; CHECK-NEXT: fcvtzs z13.d, p0/m, z7.h -; CHECK-NEXT: movprfx z15, z25 -; CHECK-NEXT: fcvtzs z15.d, p0/m, z25.h -; CHECK-NEXT: not p5.b, p0/z, p1.b -; CHECK-NEXT: movprfx z18, z3 -; CHECK-NEXT: fcvtzs z18.d, p0/m, z3.h -; CHECK-NEXT: movprfx z20, z31 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z31.h -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: movprfx z21, z9 -; CHECK-NEXT: fcvtzs z21.d, p0/m, z9.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z30.h, z5.h -; CHECK-NEXT: sel z0.d, p5, z27.d, z24.d -; CHECK-NEXT: not p7.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p2.h, p0/z, z4.h, z5.h -; CHECK-NEXT: mov z29.d, p4/m, z27.d -; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z26.h -; CHECK-NEXT: not p5.b, p0/z, p6.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: fcmge p6.h, p0/z, z9.h, z26.h -; CHECK-NEXT: fcmgt p9.h, p0/z, z6.h, z5.h -; CHECK-NEXT: str z0, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: sel z0.d, p7, z27.d, z28.d -; CHECK-NEXT: movprfx z28, z8 -; CHECK-NEXT: frintx z28.h, p0/m, z8.h -; CHECK-NEXT: sel z8.d, p5, z27.d, z10.d -; CHECK-NEXT: uunpklo z10.d, z12.s -; CHECK-NEXT: uunpkhi z12.d, z12.s -; CHECK-NEXT: not p5.b, p0/z, p4.b -; CHECK-NEXT: sel z11.d, p3, z27.d, z13.d -; CHECK-NEXT: uunpklo z13.d, z16.s -; CHECK-NEXT: fcmge p3.h, p0/z, z3.h, z26.h -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: sel z24.d, p5, z27.d, z15.d -; CHECK-NEXT: uunpkhi z15.d, z16.s -; CHECK-NEXT: movprfx z14, z28 -; CHECK-NEXT: fcvtzs z14.d, p0/m, z28.h -; CHECK-NEXT: frintx z10.h, p0/m, z10.h -; CHECK-NEXT: uunpklo z16.d, z17.s -; CHECK-NEXT: frintx z12.h, p0/m, z12.h -; CHECK-NEXT: uunpkhi z17.d, z17.s -; CHECK-NEXT: movprfx z19, z13 -; CHECK-NEXT: frintx z19.h, p0/m, z13.h -; CHECK-NEXT: fcmge p4.h, p0/z, z28.h, z26.h -; CHECK-NEXT: fcmge p5.h, p0/z, z31.h, z26.h -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: frintx z15.h, p0/m, z15.h -; CHECK-NEXT: fcmge p7.h, p0/z, z10.h, z26.h +; CHECK-NEXT: uunpkhi z14.s, z2.h +; CHECK-NEXT: uunpklo z15.s, z3.h +; CHECK-NEXT: uunpklo z7.d, z0.s +; CHECK-NEXT: uunpklo z5.d, z4.s +; CHECK-NEXT: uunpkhi z6.d, z4.s +; CHECK-NEXT: uunpklo z29.d, z25.s +; CHECK-NEXT: uunpkhi z26.d, z0.s +; CHECK-NEXT: uunpklo z8.d, z10.s +; CHECK-NEXT: uunpkhi z11.d, z10.s +; CHECK-NEXT: uunpklo z10.d, z9.s +; CHECK-NEXT: uunpkhi z13.d, z9.s +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z16.d, z12.s +; CHECK-NEXT: uunpklo z18.d, z14.s +; CHECK-NEXT: movprfx z1, z7 +; CHECK-NEXT: frintx z1.h, p0/m, z7.h +; CHECK-NEXT: movprfx z4, z5 +; CHECK-NEXT: frintx z4.h, p0/m, z5.h +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: frintx z5.h, p0/m, z6.h +; CHECK-NEXT: movprfx z7, z29 +; CHECK-NEXT: frintx z7.h, p0/m, z29.h +; CHECK-NEXT: movprfx z6, z26 +; CHECK-NEXT: frintx z6.h, p0/m, z26.h +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z9, z11 +; CHECK-NEXT: frintx z9.h, p0/m, z11.h +; CHECK-NEXT: movprfx z3, z10 +; CHECK-NEXT: frintx z3.h, p0/m, z10.h +; CHECK-NEXT: movprfx z10, z13 +; CHECK-NEXT: frintx z10.h, p0/m, z13.h +; CHECK-NEXT: uunpkhi z26.d, z25.s +; CHECK-NEXT: uunpkhi z13.d, z12.s +; CHECK-NEXT: frintx z8.h, p0/m, z8.h +; CHECK-NEXT: fcmge p3.h, p0/z, z1.h, z27.h +; CHECK-NEXT: uunpkhi z14.d, z14.s +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p2.h, p0/z, z7.h, z27.h +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z27.h +; CHECK-NEXT: fcmge p5.h, p0/z, z5.h, z27.h +; CHECK-NEXT: uunpklo z19.d, z15.s +; CHECK-NEXT: uunpkhi z15.d, z15.s +; CHECK-NEXT: movprfx z20, z13 +; CHECK-NEXT: frintx z20.h, p0/m, z13.h +; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z27.h +; CHECK-NEXT: frintx z26.h, p0/m, z26.h +; CHECK-NEXT: fcvtzs z29.d, p3/m, z1.h +; CHECK-NEXT: fcmge p3.h, p0/z, z9.h, z27.h +; CHECK-NEXT: mov z11.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z31.d, p2/m, z7.h +; CHECK-NEXT: fcmge p2.h, p0/z, z8.h, z27.h +; CHECK-NEXT: mov z17.d, #0x8000000000000000 ; CHECK-NEXT: frintx z16.h, p0/m, z16.h -; CHECK-NEXT: fcmge p8.h, p0/z, z12.h, z26.h -; CHECK-NEXT: frintx z17.h, p0/m, z17.h -; CHECK-NEXT: movprfx z23, z19 -; CHECK-NEXT: fcvtzs z23.d, p0/m, z19.h -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: sel z13.d, p3, z27.d, z18.d -; CHECK-NEXT: fcmge p3.h, p0/z, z19.h, z26.h -; CHECK-NEXT: movprfx z0, z15 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z15.h -; CHECK-NEXT: sel z22.d, p4, z27.d, z14.d -; CHECK-NEXT: sel z18.d, p6, z27.d, z21.d -; CHECK-NEXT: movprfx z21, z12 -; CHECK-NEXT: fcvtzs z21.d, p0/m, z12.h -; CHECK-NEXT: movprfx z1, z16 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z16.h -; CHECK-NEXT: sel z14.d, p5, z27.d, z20.d -; CHECK-NEXT: fcmge p4.h, p0/z, z15.h, z26.h -; CHECK-NEXT: movprfx z20, z10 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z10.h -; CHECK-NEXT: movprfx z2, z17 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z17.h -; CHECK-NEXT: not p5.b, p0/z, p7.b -; CHECK-NEXT: fcmge p6.h, p0/z, z16.h, z26.h -; CHECK-NEXT: not p7.b, p0/z, p8.b -; CHECK-NEXT: fcmge p8.h, p0/z, z17.h, z26.h -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z20.d, p5/m, z27.d -; CHECK-NEXT: mov z21.d, p7/m, z27.d -; CHECK-NEXT: not p5.b, p0/z, p6.b -; CHECK-NEXT: mov z23.d, p3/m, z27.d -; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z5.h -; CHECK-NEXT: not p6.b, p0/z, p8.b -; CHECK-NEXT: mov z0.d, p4/m, z27.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z16.h, z5.h -; CHECK-NEXT: mov z1.d, p5/m, z27.d -; CHECK-NEXT: fcmuo p5.h, p0/z, z16.h, z16.h -; CHECK-NEXT: mov z29.d, p2/m, z26.d -; CHECK-NEXT: mov z2.d, p6/m, z27.d -; CHECK-NEXT: ldr z27, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fcmgt p6.h, p0/z, z7.h, z5.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z12.h, z5.h -; CHECK-NEXT: fcmuo p8.h, p0/z, z17.h, z17.h -; CHECK-NEXT: fcmgt p7.h, p0/z, z28.h, z5.h -; CHECK-NEXT: mov z1.d, p4/m, z26.d -; CHECK-NEXT: fcmuo p4.h, p0/z, z15.h, z15.h -; CHECK-NEXT: mov z8.d, p9/m, z26.d -; CHECK-NEXT: mov z27.d, p1/m, z26.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z15.h, z5.h -; CHECK-NEXT: mov z2.d, p3/m, z26.d -; CHECK-NEXT: fcmgt p3.h, p0/z, z19.h, z5.h -; CHECK-NEXT: mov z11.d, p6/m, z26.d -; CHECK-NEXT: fcmuo p6.h, p0/z, z19.h, z19.h -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p5.h, p0/z, z9.h, z5.h -; CHECK-NEXT: sel z15.d, p2, z26.d, z21.d -; CHECK-NEXT: fcmuo p2.h, p0/z, z12.h, z12.h -; CHECK-NEXT: mov z2.d, p8/m, #0 // =0x0 -; CHECK-NEXT: sel z16.d, p7, z26.d, z22.d -; CHECK-NEXT: mov z0.d, p1/m, z26.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z10.h, z5.h -; CHECK-NEXT: str z1, [x8, #14, mul vl] -; CHECK-NEXT: sel z17.d, p3, z26.d, z23.d -; CHECK-NEXT: fcmuo p3.h, p0/z, z10.h, z10.h -; CHECK-NEXT: str z2, [x8, #15, mul vl] -; CHECK-NEXT: sel z2.d, p5, z26.d, z18.d -; CHECK-NEXT: fcmuo p5.h, p0/z, z9.h, z9.h -; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p4.h, p0/z, z3.h, z5.h -; CHECK-NEXT: mov z15.d, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p1, z26.d, z20.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z31.h, z5.h -; CHECK-NEXT: mov z17.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p2.h, p0/z, z31.h, z31.h +; CHECK-NEXT: frintx z14.h, p0/m, z14.h +; CHECK-NEXT: fcvtzs z0.d, p4/m, z4.h +; CHECK-NEXT: fcvtzs z28.d, p5/m, z5.h +; CHECK-NEXT: fcmge p4.h, p0/z, z3.h, z27.h +; CHECK-NEXT: mov z12.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.h, p0/z, z10.h, z27.h +; CHECK-NEXT: mov z13.d, #0x8000000000000000 +; CHECK-NEXT: frintx z19.h, p0/m, z19.h +; CHECK-NEXT: frintx z15.h, p0/m, z15.h +; CHECK-NEXT: mov z24.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z30.d, p1/m, z6.h +; CHECK-NEXT: fcmge p1.h, p0/z, z26.h, z27.h +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: frintx z18.h, p0/m, z18.h +; CHECK-NEXT: fcvtzs z11.d, p3/m, z9.h +; CHECK-NEXT: fcmge p3.h, p0/z, z20.h, z27.h +; CHECK-NEXT: mov z25.h, w9 +; CHECK-NEXT: fcvtzs z17.d, p2/m, z8.h +; CHECK-NEXT: fcmge p6.h, p0/z, z16.h, z27.h +; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p2.h, p0/z, z14.h, z27.h +; CHECK-NEXT: mov z22.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z12.d, p4/m, z3.h +; CHECK-NEXT: fcvtzs z13.d, p5/m, z10.h +; CHECK-NEXT: fcmge p4.h, p0/z, z19.h, z27.h +; CHECK-NEXT: mov z23.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.h, p0/z, z15.h, z27.h +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z26.h +; CHECK-NEXT: fcmge p1.h, p0/z, z18.h, z27.h +; CHECK-NEXT: fcvtzs z24.d, p3/m, z20.h +; CHECK-NEXT: mov z27.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p11.h, p0/z, z20.h, z25.h +; CHECK-NEXT: fcvtzs z21.d, p6/m, z16.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z16.h, z25.h +; CHECK-NEXT: fcmuo p6.h, p0/z, z16.h, z16.h +; CHECK-NEXT: fcvtzs z22.d, p2/m, z14.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z8.h, z25.h +; CHECK-NEXT: mov z16.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p7.h, p0/z, z5.h, z25.h +; CHECK-NEXT: fcvtzs z23.d, p4/m, z19.h +; CHECK-NEXT: fcvtzs z0.d, p5/m, z15.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z20.h, z20.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z15.h, z25.h +; CHECK-NEXT: mov z24.d, p11/m, z27.d +; CHECK-NEXT: sel z20.d, p3, z27.d, z21.d +; CHECK-NEXT: fcmgt p3.h, p0/z, z19.h, z25.h +; CHECK-NEXT: fcmgt p8.h, p0/z, z1.h, z25.h +; CHECK-NEXT: mov z17.d, p2/m, z27.d +; CHECK-NEXT: fcvtzs z16.d, p1/m, z18.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z18.h, z25.h +; CHECK-NEXT: mov z28.d, p7/m, z27.d +; CHECK-NEXT: fcmgt p7.h, p0/z, z14.h, z25.h +; CHECK-NEXT: fcmuo p1.h, p0/z, z15.h, z15.h +; CHECK-NEXT: mov z0.d, p5/m, z27.d +; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p5.h, p0/z, z10.h, z25.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z19.h, z19.h +; CHECK-NEXT: sel z19.d, p3, z27.d, z23.d +; CHECK-NEXT: fcmuo p3.h, p0/z, z14.h, z14.h +; CHECK-NEXT: mov z20.d, p6/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p9.h, p0/z, z6.h, z25.h +; CHECK-NEXT: fcmgt p10.h, p0/z, z7.h, z25.h +; CHECK-NEXT: str z24, [x8, #15, mul vl] +; CHECK-NEXT: sel z24.d, p2, z27.d, z16.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z25.h +; CHECK-NEXT: sel z15.d, p7, z27.d, z22.d +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z13.d, p5/m, z27.d +; CHECK-NEXT: str z20, [x8, #14, mul vl] +; CHECK-NEXT: fcmgt p5.h, p0/z, z9.h, z25.h +; CHECK-NEXT: fcmuo p1.h, p0/z, z18.h, z18.h +; CHECK-NEXT: mov z19.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.h, p0/z, z10.h, z10.h +; CHECK-NEXT: mov z29.d, p8/m, z27.d ; CHECK-NEXT: str z0, [x8, #13, mul vl] -; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.h, p0/z, z25.h, z25.h -; CHECK-NEXT: str z17, [x8, #12, mul vl] -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p3.h, p0/z, z25.h, z5.h +; CHECK-NEXT: mov z15.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p3.h, p0/z, z3.h, z3.h +; CHECK-NEXT: sel z0.d, p2, z27.d, z12.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z9.h, z9.h +; CHECK-NEXT: mov z30.d, p9/m, z27.d +; CHECK-NEXT: str z19, [x8, #12, mul vl] +; CHECK-NEXT: sel z3.d, p5, z27.d, z11.d +; CHECK-NEXT: mov z24.d, p1/m, #0 // =0x0 ; CHECK-NEXT: str z15, [x8, #11, mul vl] -; CHECK-NEXT: sel z0.d, p1, z26.d, z14.d -; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h -; CHECK-NEXT: sel z3.d, p4, z26.d, z13.d -; CHECK-NEXT: fcmuo p4.h, p0/z, z28.h, z28.h -; CHECK-NEXT: str z1, [x8, #10, mul vl] -; CHECK-NEXT: sel z1.d, p3, z26.d, z24.d -; CHECK-NEXT: fcmuo p3.h, p0/z, z7.h, z7.h -; CHECK-NEXT: ldr z7, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str z2, [x8, #9, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p1.h, p0/z, z6.h, z6.h -; CHECK-NEXT: mov z16.d, p4/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p4.h, p0/z, z4.h, z4.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z7.h, z5.h -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.h, p0/z, z30.h, z30.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z26.h, z25.h +; CHECK-NEXT: mov z13.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p5.h, p0/z, z8.h, z8.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z26.h, z26.h +; CHECK-NEXT: str z24, [x8, #10, mul vl] +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p3.h, p0/z, z4.h, z25.h +; CHECK-NEXT: str z13, [x8, #9, mul vl] +; CHECK-NEXT: fcmuo p2.h, p0/z, z6.h, z6.h +; CHECK-NEXT: mov z31.d, p10/m, z27.d ; CHECK-NEXT: str z0, [x8, #8, mul vl] -; CHECK-NEXT: fcmuo p0.h, p0/z, z7.h, z7.h -; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p1/m, z27.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z7.h, z7.h ; CHECK-NEXT: str z3, [x8, #7, mul vl] -; CHECK-NEXT: ldr z0, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str z16, [x8, #6, mul vl] -; CHECK-NEXT: mov z8.d, p1/m, #0 // =0x0 -; CHECK-NEXT: str z1, [x8, #5, mul vl] -; CHECK-NEXT: mov z29.d, p4/m, #0 // =0x0 -; CHECK-NEXT: mov z27.d, p5/m, #0 // =0x0 -; CHECK-NEXT: str z11, [x8, #4, mul vl] -; CHECK-NEXT: str z8, [x8, #3, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, z26.d -; CHECK-NEXT: str z29, [x8, #2, mul vl] -; CHECK-NEXT: str z27, [x8, #1, mul vl] +; CHECK-NEXT: mov z17.d, p5/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p5.h, p0/z, z1.h, z1.h +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.h, p0/z, z5.h, z5.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z4.h, z4.h +; CHECK-NEXT: str z17, [x8, #6, mul vl] +; CHECK-NEXT: mov z31.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z30.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, z27.d +; CHECK-NEXT: mov z29.d, p5/m, #0 // =0x0 +; CHECK-NEXT: str z2, [x8, #5, mul vl] +; CHECK-NEXT: str z31, [x8, #4, mul vl] +; CHECK-NEXT: mov z28.d, p4/m, #0 // =0x0 +; CHECK-NEXT: str z30, [x8, #3, mul vl] ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: str z29, [x8, #2, mul vl] +; CHECK-NEXT: str z28, [x8, #1, mul vl] ; CHECK-NEXT: str z0, [x8] -; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -592,6 +511,8 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload @@ -611,20 +532,17 @@ define @llrint_v1i64_v1f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f32( %x) @@ -637,20 +555,17 @@ define @llrint_v2i64_v2f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f32( %x) @@ -661,43 +576,30 @@ declare @llvm.llrint.nxv2i64.nxv2f32() define @llrint_v4i64_v4f32( %x) { ; CHECK-LABEL: llrint_v4i64_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff -; CHECK-NEXT: mov z3.s, w8 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: mov z5.s, w8 ; CHECK-NEXT: frintx z1.s, p0/m, z1.s ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.s -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z3.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z3.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmuo p3.s, p0/z, z1.s, z1.s +; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z5.s +; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z5.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p2, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv4i64.nxv4f32( %x) ret %a @@ -709,7 +611,6 @@ define @llrint_v8i64_v8f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill @@ -718,57 +619,47 @@ define @llrint_v8i64_v8f32( %x) { ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 -; CHECK-NEXT: uunpklo z3.d, z1.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z3.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: mov z4.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: mov z5.d, #0x8000000000000000 -; CHECK-NEXT: mov z6.s, w8 -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z6.d, #0x8000000000000000 +; CHECK-NEXT: mov z25.s, w8 +; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: frintx z2.s, p0/m, z2.s ; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: mov z24.d, #0x8000000000000000 ; CHECK-NEXT: frintx z3.s, p0/m, z3.s ; CHECK-NEXT: frintx z1.s, p0/m, z1.s ; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, z4.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z4.s -; CHECK-NEXT: movprfx z7, z0 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z0.s +; CHECK-NEXT: fcmuo p6.s, p0/z, z0.s, z0.s ; CHECK-NEXT: fcmge p3.s, p0/z, z3.s, z4.s ; CHECK-NEXT: fcmge p4.s, p0/z, z1.s, z4.s -; CHECK-NEXT: movprfx z4, z2 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.s -; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.s -; CHECK-NEXT: movprfx z25, z1 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z1.s -; CHECK-NEXT: fcmgt p7.s, p0/z, z3.s, z6.s -; CHECK-NEXT: fcmgt p5.s, p0/z, z2.s, z6.s -; CHECK-NEXT: fcmgt p6.s, p0/z, z0.s, z6.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z4.d, p1/m, z5.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z6.s -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d -; CHECK-NEXT: fcmuo p2.s, p0/z, z2.s, z2.s -; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d -; CHECK-NEXT: fcmuo p3.s, p0/z, z0.s, z0.s -; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d -; CHECK-NEXT: fcmuo p4.s, p0/z, z3.s, z3.s +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p5.s, p0/z, z1.s, z25.s +; CHECK-NEXT: fcvtzs z5.d, p1/m, z2.s +; CHECK-NEXT: fcvtzs z6.d, p2/m, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z25.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z25.s +; CHECK-NEXT: fcvtzs z7.d, p3/m, z3.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z3.s, z25.s +; CHECK-NEXT: fcvtzs z24.d, p4/m, z1.s +; CHECK-NEXT: fcmuo p4.s, p0/z, z2.s, z2.s +; CHECK-NEXT: sel z0.d, p1, z4.d, z5.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z3.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s -; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d -; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: sel z1.d, p2, z4.d, z6.d +; CHECK-NEXT: sel z2.d, p3, z4.d, z7.d +; CHECK-NEXT: sel z3.d, p5, z4.d, z24.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -782,7 +673,7 @@ define @llrint_v16i64_v16f32( %x) { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill @@ -790,119 +681,106 @@ define @llrint_v16i64_v16f32( %x) { ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG ; CHECK-NEXT: uunpklo z4.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z7.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: uunpklo z24.d, z2.s +; CHECK-NEXT: uunpklo z6.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z25.d, z3.s -; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff -; CHECK-NEXT: movprfx z5, z4 -; CHECK-NEXT: frintx z5.s, p0/m, z4.s -; CHECK-NEXT: movprfx z6, z0 -; CHECK-NEXT: frintx z6.s, p0/m, z0.s -; CHECK-NEXT: mov z4.s, w8 -; CHECK-NEXT: frintx z7.s, p0/m, z7.s -; CHECK-NEXT: movprfx z28, z1 -; CHECK-NEXT: frintx z28.s, p0/m, z1.s +; CHECK-NEXT: uunpklo z5.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z7.d, z3.s +; CHECK-NEXT: mov z24.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff -; CHECK-NEXT: mov z0.d, #0x8000000000000000 -; CHECK-NEXT: frintx z24.s, p0/m, z24.s -; CHECK-NEXT: movprfx z29, z2 -; CHECK-NEXT: frintx z29.s, p0/m, z2.s -; CHECK-NEXT: frintx z25.s, p0/m, z25.s -; CHECK-NEXT: movprfx z30, z3 -; CHECK-NEXT: frintx z30.s, p0/m, z3.s -; CHECK-NEXT: mov z27.s, w8 -; CHECK-NEXT: fcmge p1.s, p0/z, z5.s, z4.s -; CHECK-NEXT: fcmge p2.s, p0/z, z6.s, z4.s -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s -; CHECK-NEXT: movprfx z2, z6 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s -; CHECK-NEXT: fcmge p5.s, p0/z, z7.s, z4.s -; CHECK-NEXT: fcmge p6.s, p0/z, z28.s, z4.s -; CHECK-NEXT: movprfx z3, z7 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s -; CHECK-NEXT: fcmge p8.s, p0/z, z29.s, z4.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z5.s, z27.s -; CHECK-NEXT: fcmgt p7.s, p0/z, z6.s, z27.s -; CHECK-NEXT: fcmge p9.s, p0/z, z25.s, z4.s -; CHECK-NEXT: movprfx z31, z25 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z25.s -; CHECK-NEXT: not p4.b, p0/z, p1.b -; CHECK-NEXT: fcmuo p1.s, p0/z, z5.s, z5.s -; CHECK-NEXT: movprfx z5, z28 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmge p10.s, p0/z, z30.s, z4.s -; CHECK-NEXT: movprfx z8, z30 -; CHECK-NEXT: fcvtzs z8.d, p0/m, z30.s -; CHECK-NEXT: mov z1.d, p4/m, z0.d -; CHECK-NEXT: fcmge p4.s, p0/z, z24.s, z4.s -; CHECK-NEXT: movprfx z4, z29 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z29.s -; CHECK-NEXT: mov z2.d, p2/m, z0.d -; CHECK-NEXT: fcmuo p2.s, p0/z, z6.s, z6.s -; CHECK-NEXT: movprfx z6, z24 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.s -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z3.d, p5/m, z0.d -; CHECK-NEXT: not p5.b, p0/z, p8.b -; CHECK-NEXT: mov z5.d, p6/m, z0.d -; CHECK-NEXT: fcmgt p8.s, p0/z, z7.s, z27.s -; CHECK-NEXT: not p6.b, p0/z, p9.b -; CHECK-NEXT: mov z6.d, p4/m, z0.d -; CHECK-NEXT: fcmuo p9.s, p0/z, z7.s, z7.s -; CHECK-NEXT: not p4.b, p0/z, p10.b -; CHECK-NEXT: fcmgt p10.s, p0/z, z28.s, z27.s -; CHECK-NEXT: sel z7.d, p5, z0.d, z4.d -; CHECK-NEXT: fcmgt p5.s, p0/z, z24.s, z27.s -; CHECK-NEXT: mov z31.d, p6/m, z0.d -; CHECK-NEXT: fcmgt p6.s, p0/z, z30.s, z27.s -; CHECK-NEXT: mov z8.d, p4/m, z0.d -; CHECK-NEXT: sel z0.d, p3, z26.d, z1.d -; CHECK-NEXT: fcmgt p3.s, p0/z, z29.s, z27.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z25.s, z27.s -; CHECK-NEXT: sel z1.d, p7, z26.d, z2.d -; CHECK-NEXT: fcmuo p7.s, p0/z, z28.s, z28.s -; CHECK-NEXT: sel z2.d, p8, z26.d, z3.d -; CHECK-NEXT: sel z3.d, p10, z26.d, z5.d -; CHECK-NEXT: fcmuo p8.s, p0/z, z29.s, z29.s -; CHECK-NEXT: sel z4.d, p5, z26.d, z6.d -; CHECK-NEXT: fcmuo p5.s, p0/z, z24.s, z24.s -; CHECK-NEXT: fcmuo p10.s, p0/z, z25.s, z25.s -; CHECK-NEXT: sel z5.d, p3, z26.d, z7.d -; CHECK-NEXT: fcmuo p0.s, p0/z, z30.s, z30.s -; CHECK-NEXT: sel z7.d, p6, z26.d, z8.d -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov z26.d, #0x8000000000000000 +; CHECK-NEXT: frintx z4.s, p0/m, z4.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: mov z30.s, w8 +; CHECK-NEXT: movprfx z27, z2 +; CHECK-NEXT: frintx z27.s, p0/m, z2.s +; CHECK-NEXT: uunpkhi z2.d, z3.s +; CHECK-NEXT: frintx z6.s, p0/m, z6.s +; CHECK-NEXT: movprfx z25, z1 +; CHECK-NEXT: frintx z25.s, p0/m, z1.s +; CHECK-NEXT: frintx z5.s, p0/m, z5.s +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.s, p0/z, z4.s, z24.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z24.s +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z9, z2 +; CHECK-NEXT: frintx z9.s, p0/m, z2.s +; CHECK-NEXT: fcmge p5.s, p0/z, z6.s, z24.s +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p3.s, p0/z, z5.s, z24.s +; CHECK-NEXT: fcmge p4.s, p0/z, z25.s, z24.s +; CHECK-NEXT: fcmge p7.s, p0/z, z7.s, z24.s +; CHECK-NEXT: fcmge p6.s, p0/z, z27.s, z24.s +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: mov z10.d, #0x7fffffffffffffff +; CHECK-NEXT: fcvtzs z1.d, p1/m, z4.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z4.s, z30.s +; CHECK-NEXT: fcvtzs z26.d, p2/m, z0.s +; CHECK-NEXT: fcmge p2.s, p0/z, z9.s, z24.s +; CHECK-NEXT: mov z24.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z29.d, p5/m, z6.s +; CHECK-NEXT: fcvtzs z3.d, p3/m, z5.s +; CHECK-NEXT: fcvtzs z28.d, p4/m, z25.s +; CHECK-NEXT: fcvtzs z8.d, p7/m, z7.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z30.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z5.s, z30.s +; CHECK-NEXT: fcmgt p7.s, p0/z, z25.s, z30.s +; CHECK-NEXT: fcmgt p8.s, p0/z, z6.s, z30.s +; CHECK-NEXT: fcvtzs z31.d, p6/m, z27.s +; CHECK-NEXT: fcmuo p6.s, p0/z, z0.s, z0.s +; CHECK-NEXT: sel z0.d, p1, z10.d, z1.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z27.s, z30.s +; CHECK-NEXT: fcmgt p10.s, p0/z, z7.s, z30.s +; CHECK-NEXT: fcvtzs z24.d, p2/m, z9.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z9.s, z30.s +; CHECK-NEXT: fcmuo p3.s, p0/z, z4.s, z4.s +; CHECK-NEXT: fcmuo p9.s, p0/z, z5.s, z5.s +; CHECK-NEXT: sel z1.d, p4, z10.d, z26.d +; CHECK-NEXT: fcmuo p4.s, p0/z, z25.s, z25.s +; CHECK-NEXT: sel z2.d, p5, z10.d, z3.d +; CHECK-NEXT: sel z3.d, p7, z10.d, z28.d +; CHECK-NEXT: sel z4.d, p8, z10.d, z29.d +; CHECK-NEXT: fcmuo p5.s, p0/z, z6.s, z6.s +; CHECK-NEXT: fcmuo p7.s, p0/z, z27.s, z27.s +; CHECK-NEXT: fcmuo p8.s, p0/z, z7.s, z7.s +; CHECK-NEXT: sel z5.d, p1, z10.d, z31.d +; CHECK-NEXT: sel z6.d, p10, z10.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: fcmuo p0.s, p0/z, z9.s, z9.s +; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: sel z7.d, p2, z10.d, z24.d +; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z6.d, p4, z26.d, z31.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 -; CHECK-NEXT: mov z3.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 -; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p7/m, #0 // =0x0 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z6.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv16i64.nxv16f32( %x) @@ -915,6 +793,8 @@ define @llrint_v32i64_v32f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p11, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill @@ -937,8 +817,8 @@ define @llrint_v32i64_v32f32( %x) { ; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG @@ -949,224 +829,185 @@ define @llrint_v32i64_v32f32( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: uunpklo z24.d, z0.s -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z25.d, z0.s ; CHECK-NEXT: mov w9, #-553648128 // =0xdf000000 ; CHECK-NEXT: uunpklo z26.d, z1.s -; CHECK-NEXT: uunpkhi z25.d, z0.s -; CHECK-NEXT: uunpkhi z28.d, z1.s -; CHECK-NEXT: mov z29.s, w9 +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z27.d, z2.s +; CHECK-NEXT: uunpkhi z9.d, z2.s +; CHECK-NEXT: uunpklo z11.d, z3.s +; CHECK-NEXT: uunpkhi z12.d, z3.s +; CHECK-NEXT: mov z10.s, w9 ; CHECK-NEXT: mov w9, #1593835519 // =0x5effffff -; CHECK-NEXT: mov z17.d, z5.d -; CHECK-NEXT: mov z27.d, #0x8000000000000000 -; CHECK-NEXT: uunpkhi z30.d, z2.s -; CHECK-NEXT: uunpklo z8.d, z3.s ; CHECK-NEXT: movprfx z0, z24 ; CHECK-NEXT: frintx z0.s, p0/m, z24.s -; CHECK-NEXT: uunpkhi z9.d, z3.s +; CHECK-NEXT: movprfx z24, z25 +; CHECK-NEXT: frintx z24.s, p0/m, z25.s +; CHECK-NEXT: uunpklo z13.d, z4.s +; CHECK-NEXT: movprfx z25, z26 +; CHECK-NEXT: frintx z25.s, p0/m, z26.s +; CHECK-NEXT: movprfx z26, z1 +; CHECK-NEXT: frintx z26.s, p0/m, z1.s ; CHECK-NEXT: uunpkhi z14.d, z4.s -; CHECK-NEXT: movprfx z24, z26 -; CHECK-NEXT: frintx z24.s, p0/m, z26.s -; CHECK-NEXT: movprfx z1, z25 -; CHECK-NEXT: frintx z1.s, p0/m, z25.s -; CHECK-NEXT: movprfx z5, z28 -; CHECK-NEXT: frintx z5.s, p0/m, z28.s -; CHECK-NEXT: uunpklo z26.d, z2.s -; CHECK-NEXT: uunpklo z16.d, z17.s -; CHECK-NEXT: mov z25.s, w9 -; CHECK-NEXT: movprfx z28, z30 -; CHECK-NEXT: frintx z28.s, p0/m, z30.s -; CHECK-NEXT: movprfx z30, z8 -; CHECK-NEXT: frintx z30.s, p0/m, z8.s -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z29.s -; CHECK-NEXT: movprfx z31, z0 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z0.s -; CHECK-NEXT: str z0, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z29.s -; CHECK-NEXT: fcmge p3.s, p0/z, z24.s, z29.s -; CHECK-NEXT: fcmge p5.s, p0/z, z5.s, z29.s -; CHECK-NEXT: frintx z26.s, p0/m, z26.s -; CHECK-NEXT: movprfx z10, z1 -; CHECK-NEXT: fcvtzs z10.d, p0/m, z1.s -; CHECK-NEXT: movprfx z11, z24 -; CHECK-NEXT: fcvtzs z11.d, p0/m, z24.s -; CHECK-NEXT: movprfx z12, z5 -; CHECK-NEXT: fcvtzs z12.d, p0/m, z5.s -; CHECK-NEXT: movprfx z15, z28 -; CHECK-NEXT: fcvtzs z15.d, p0/m, z28.s -; CHECK-NEXT: str z1, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: not p4.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z25.s -; CHECK-NEXT: fcmgt p9.s, p0/z, z5.s, z25.s -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z0.d, p4, z27.d, z31.d -; CHECK-NEXT: fcmge p4.s, p0/z, z26.s, z29.s -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: movprfx z13, z26 -; CHECK-NEXT: fcvtzs z13.d, p0/m, z26.s -; CHECK-NEXT: sel z31.d, p2, z27.d, z10.d -; CHECK-NEXT: uunpklo z10.d, z4.s -; CHECK-NEXT: sel z8.d, p3, z27.d, z11.d -; CHECK-NEXT: fcmge p3.s, p0/z, z28.s, z29.s -; CHECK-NEXT: sel z11.d, p5, z27.d, z12.d -; CHECK-NEXT: movprfx z4, z9 -; CHECK-NEXT: frintx z4.s, p0/m, z9.s -; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: not p5.b, p0/z, p4.b -; CHECK-NEXT: fcmge p4.s, p0/z, z30.s, z29.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z24.s, z25.s -; CHECK-NEXT: sel z12.d, p5, z27.d, z13.d -; CHECK-NEXT: uunpkhi z13.d, z17.s -; CHECK-NEXT: movprfx z9, z10 -; CHECK-NEXT: frintx z9.s, p0/m, z10.s -; CHECK-NEXT: movprfx z10, z14 -; CHECK-NEXT: frintx z10.s, p0/m, z14.s -; CHECK-NEXT: uunpkhi z17.d, z6.s -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: uunpklo z14.d, z6.s -; CHECK-NEXT: movprfx z6, z16 -; CHECK-NEXT: frintx z6.s, p0/m, z16.s -; CHECK-NEXT: uunpklo z16.d, z7.s +; CHECK-NEXT: movprfx z2, z27 +; CHECK-NEXT: frintx z2.s, p0/m, z27.s +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z27, z9 +; CHECK-NEXT: frintx z27.s, p0/m, z9.s +; CHECK-NEXT: movprfx z9, z11 +; CHECK-NEXT: frintx z9.s, p0/m, z11.s +; CHECK-NEXT: movprfx z11, z12 +; CHECK-NEXT: frintx z11.s, p0/m, z12.s +; CHECK-NEXT: uunpklo z15.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: sel z3.d, p3, z27.d, z15.d -; CHECK-NEXT: fcmge p3.s, p0/z, z4.s, z29.s -; CHECK-NEXT: frintx z13.s, p0/m, z13.s -; CHECK-NEXT: movprfx z15, z30 -; CHECK-NEXT: fcvtzs z15.d, p0/m, z30.s -; CHECK-NEXT: fcmge p5.s, p0/z, z9.s, z29.s -; CHECK-NEXT: fcmge p6.s, p0/z, z10.s, z29.s -; CHECK-NEXT: frintx z17.s, p0/m, z17.s -; CHECK-NEXT: movprfx z18, z4 -; CHECK-NEXT: fcvtzs z18.d, p0/m, z4.s -; CHECK-NEXT: movprfx z20, z10 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z10.s -; CHECK-NEXT: frintx z16.s, p0/m, z16.s -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: movprfx z19, z14 -; CHECK-NEXT: frintx z19.s, p0/m, z14.s -; CHECK-NEXT: movprfx z14, z9 -; CHECK-NEXT: fcvtzs z14.d, p0/m, z9.s -; CHECK-NEXT: fcmge p7.s, p0/z, z6.s, z29.s -; CHECK-NEXT: fcmge p8.s, p0/z, z13.s, z29.s +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p3.s, p0/z, z26.s, z10.s +; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.s, p0/z, z0.s, z10.s +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.s, p0/z, z24.s, z10.s +; CHECK-NEXT: movprfx z12, z13 +; CHECK-NEXT: frintx z12.s, p0/m, z13.s +; CHECK-NEXT: fcmge p2.s, p0/z, z25.s, z10.s +; CHECK-NEXT: fcmge p4.s, p0/z, z2.s, z10.s +; CHECK-NEXT: movprfx z13, z14 +; CHECK-NEXT: frintx z13.s, p0/m, z14.s +; CHECK-NEXT: uunpklo z17.d, z5.s +; CHECK-NEXT: uunpkhi z18.d, z5.s ; CHECK-NEXT: movprfx z21, z7 ; CHECK-NEXT: frintx z21.s, p0/m, z7.s -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: mov z15.d, p4/m, z27.d -; CHECK-NEXT: fcmge p4.s, p0/z, z17.s, z29.s -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: sel z7.d, p3, z27.d, z18.d -; CHECK-NEXT: movprfx z0, z17 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z17.s -; CHECK-NEXT: sel z18.d, p6, z27.d, z20.d -; CHECK-NEXT: movprfx z20, z6 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z6.s -; CHECK-NEXT: fcmge p6.s, p0/z, z16.s, z29.s -; CHECK-NEXT: fcmge p3.s, p0/z, z19.s, z29.s -; CHECK-NEXT: mov z14.d, p5/m, z27.d -; CHECK-NEXT: not p5.b, p0/z, p7.b -; CHECK-NEXT: not p7.b, p0/z, p8.b -; CHECK-NEXT: fcmge p8.s, p0/z, z21.s, z29.s -; CHECK-NEXT: movprfx z1, z16 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z16.s -; CHECK-NEXT: movprfx z22, z13 -; CHECK-NEXT: fcvtzs z22.d, p0/m, z13.s -; CHECK-NEXT: movprfx z23, z19 -; CHECK-NEXT: fcvtzs z23.d, p0/m, z19.s -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: movprfx z2, z21 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z21.s -; CHECK-NEXT: mov z29.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z20.d, p5/m, z27.d -; CHECK-NEXT: not p5.b, p0/z, p6.b -; CHECK-NEXT: mov z0.d, p4/m, z27.d -; CHECK-NEXT: fcmgt p4.s, p0/z, z16.s, z25.s -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: not p6.b, p0/z, p8.b -; CHECK-NEXT: mov z1.d, p5/m, z27.d -; CHECK-NEXT: mov z22.d, p7/m, z27.d -; CHECK-NEXT: mov z23.d, p3/m, z27.d -; CHECK-NEXT: fcmgt p3.s, p0/z, z21.s, z25.s -; CHECK-NEXT: fcmuo p5.s, p0/z, z16.s, z16.s -; CHECK-NEXT: mov z2.d, p6/m, z27.d -; CHECK-NEXT: sel z27.d, p1, z29.d, z31.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z17.s, z25.s -; CHECK-NEXT: mov z1.d, p4/m, z29.d -; CHECK-NEXT: fcmgt p6.s, p0/z, z26.s, z25.s -; CHECK-NEXT: fcmgt p7.s, p0/z, z30.s, z25.s -; CHECK-NEXT: sel z31.d, p2, z29.d, z8.d -; CHECK-NEXT: fcmgt p2.s, p0/z, z13.s, z25.s -; CHECK-NEXT: fcmuo p8.s, p0/z, z21.s, z21.s -; CHECK-NEXT: mov z2.d, p3/m, z29.d -; CHECK-NEXT: fcmuo p4.s, p0/z, z17.s, z17.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z19.s, z25.s -; CHECK-NEXT: mov z0.d, p1/m, z29.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z6.s, z25.s -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: sel z8.d, p9, z29.d, z11.d -; CHECK-NEXT: sel z11.d, p6, z29.d, z12.d -; CHECK-NEXT: sel z12.d, p7, z29.d, z15.d -; CHECK-NEXT: fcmgt p5.s, p0/z, z10.s, z25.s -; CHECK-NEXT: sel z15.d, p2, z29.d, z22.d -; CHECK-NEXT: fcmuo p2.s, p0/z, z13.s, z13.s -; CHECK-NEXT: str z1, [x8, #14, mul vl] -; CHECK-NEXT: mov z2.d, p8/m, #0 // =0x0 +; CHECK-NEXT: uunpklo z19.d, z6.s +; CHECK-NEXT: uunpkhi z20.d, z6.s +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fcvtzs z31.d, p3/m, z26.s +; CHECK-NEXT: fcmge p3.s, p0/z, z11.s, z10.s +; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: frintx z15.s, p0/m, z15.s +; CHECK-NEXT: fcvtzs z1.d, p5/m, z0.s +; CHECK-NEXT: fcvtzs z29.d, p1/m, z24.s +; CHECK-NEXT: fcvtzs z30.d, p2/m, z25.s +; CHECK-NEXT: fcvtzs z8.d, p4/m, z2.s +; CHECK-NEXT: fcmge p1.s, p0/z, z27.s, z10.s +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p2.s, p0/z, z9.s, z10.s +; CHECK-NEXT: mov z16.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p4.s, p0/z, z12.s, z10.s +; CHECK-NEXT: mov z6.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.s, p0/z, z13.s, z10.s +; CHECK-NEXT: mov z14.d, #0x8000000000000000 +; CHECK-NEXT: frintx z17.s, p0/m, z17.s +; CHECK-NEXT: frintx z18.s, p0/m, z18.s +; CHECK-NEXT: frintx z19.s, p0/m, z19.s +; CHECK-NEXT: frintx z20.s, p0/m, z20.s +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z5.d, p3/m, z11.s +; CHECK-NEXT: fcmge p3.s, p0/z, z21.s, z10.s +; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: fcmge p6.s, p0/z, z15.s, z10.s +; CHECK-NEXT: mov z22.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z4.d, p1/m, z27.s +; CHECK-NEXT: fcvtzs z16.d, p2/m, z9.s +; CHECK-NEXT: fcvtzs z6.d, p4/m, z12.s +; CHECK-NEXT: fcvtzs z14.d, p5/m, z13.s +; CHECK-NEXT: fcmge p1.s, p0/z, z17.s, z10.s +; CHECK-NEXT: fcmge p2.s, p0/z, z18.s, z10.s +; CHECK-NEXT: mov z23.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p4.s, p0/z, z19.s, z10.s +; CHECK-NEXT: fcmge p5.s, p0/z, z20.s, z10.s +; CHECK-NEXT: mov z10.d, #0x8000000000000000 +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z28.d, p3/m, z21.s +; CHECK-NEXT: mov z7.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p11.s, p0/z, z21.s, z3.s +; CHECK-NEXT: fcvtzs z22.d, p6/m, z15.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z15.s, z3.s +; CHECK-NEXT: fcmuo p6.s, p0/z, z15.s, z15.s +; CHECK-NEXT: mov z15.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p7.s, p0/z, z24.s, z3.s +; CHECK-NEXT: fcvtzs z23.d, p2/m, z18.s +; CHECK-NEXT: fcvtzs z10.d, p5/m, z20.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z9.s, z3.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z20.s, z3.s +; CHECK-NEXT: fcvtzs z0.d, p4/m, z19.s +; CHECK-NEXT: fcmuo p4.s, p0/z, z21.s, z21.s +; CHECK-NEXT: mov z28.d, p11/m, z7.d +; CHECK-NEXT: sel z21.d, p3, z7.d, z22.d +; CHECK-NEXT: fcmgt p3.s, p0/z, z19.s, z3.s +; CHECK-NEXT: fcvtzs z15.d, p1/m, z17.s +; CHECK-NEXT: fcmuo p1.s, p0/z, z20.s, z20.s +; CHECK-NEXT: mov z29.d, p7/m, z7.d +; CHECK-NEXT: fcmgt p7.s, p0/z, z18.s, z3.s +; CHECK-NEXT: mov z16.d, p2/m, z7.d +; CHECK-NEXT: fcmgt p2.s, p0/z, z17.s, z3.s +; CHECK-NEXT: mov z10.d, p5/m, z7.d +; CHECK-NEXT: mov z28.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.s, p0/z, z19.s, z19.s +; CHECK-NEXT: mov z0.d, p3/m, z7.d +; CHECK-NEXT: fcmuo p3.s, p0/z, z18.s, z18.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z13.s, z3.s +; CHECK-NEXT: mov z21.d, p6/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p10.s, p0/z, z2.s, z3.s +; CHECK-NEXT: fcmgt p8.s, p0/z, z25.s, z3.s +; CHECK-NEXT: str z28, [x8, #15, mul vl] +; CHECK-NEXT: mov z10.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.s, p0/z, z17.s, z17.s +; CHECK-NEXT: sel z19.d, p7, z7.d, z23.d +; CHECK-NEXT: sel z28.d, p2, z7.d, z15.d +; CHECK-NEXT: fcmgt p2.s, p0/z, z12.s, z3.s +; CHECK-NEXT: str z21, [x8, #14, mul vl] ; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p1, z29.d, z20.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z9.s, z25.s -; CHECK-NEXT: fcmuo p6.s, p0/z, z19.s, z19.s -; CHECK-NEXT: sel z16.d, p3, z29.d, z23.d -; CHECK-NEXT: fcmuo p3.s, p0/z, z6.s, z6.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z4.s, z25.s -; CHECK-NEXT: str z2, [x8, #15, mul vl] -; CHECK-NEXT: sel z2.d, p5, z29.d, z18.d -; CHECK-NEXT: fcmuo p5.s, p0/z, z10.s, z10.s -; CHECK-NEXT: str z0, [x8, #13, mul vl] -; CHECK-NEXT: mov z15.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p2.s, p0/z, z9.s, z9.s -; CHECK-NEXT: sel z0.d, p1, z29.d, z14.d -; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p1.s, p0/z, z4.s, z4.s -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p3.s, p0/z, z28.s, z25.s -; CHECK-NEXT: sel z4.d, p4, z29.d, z7.d -; CHECK-NEXT: str z15, [x8, #11, mul vl] -; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.s, p0/z, z28.s, z28.s -; CHECK-NEXT: str z16, [x8, #12, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p4.s, p0/z, z30.s, z30.s -; CHECK-NEXT: str z1, [x8, #10, mul vl] -; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p1.s, p0/z, z5.s, z5.s -; CHECK-NEXT: sel z1.d, p3, z29.d, z3.d -; CHECK-NEXT: ldr z3, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str z2, [x8, #9, mul vl] +; CHECK-NEXT: mov z14.d, p5/m, z7.d +; CHECK-NEXT: str z10, [x8, #13, mul vl] +; CHECK-NEXT: fcmgt p5.s, p0/z, z11.s, z3.s +; CHECK-NEXT: fcmuo p4.s, p0/z, z13.s, z13.s +; CHECK-NEXT: mov z19.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z28.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.s, p0/z, z27.s, z3.s +; CHECK-NEXT: str z0, [x8, #12, mul vl] +; CHECK-NEXT: fcmuo p3.s, p0/z, z12.s, z12.s +; CHECK-NEXT: sel z0.d, p2, z7.d, z6.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z11.s, z11.s +; CHECK-NEXT: fcmgt p9.s, p0/z, z26.s, z3.s +; CHECK-NEXT: mov z30.d, p8/m, z7.d +; CHECK-NEXT: str z19, [x8, #11, mul vl] +; CHECK-NEXT: mov z5.d, p5/m, z7.d +; CHECK-NEXT: fcmuo p5.s, p0/z, z9.s, z9.s +; CHECK-NEXT: str z28, [x8, #10, mul vl] +; CHECK-NEXT: mov z4.d, p1/m, z7.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z2.s, z2.s +; CHECK-NEXT: ldr z2, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z14.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.s, p0/z, z27.s, z27.s +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.s, p0/z, z26.s, z26.s +; CHECK-NEXT: mov z16.d, p5/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p5.s, p0/z, z25.s, z25.s +; CHECK-NEXT: mov z31.d, p9/m, z7.d +; CHECK-NEXT: str z14, [x8, #9, mul vl] +; CHECK-NEXT: fcmgt p3.s, p0/z, z2.s, z3.s +; CHECK-NEXT: mov z8.d, p10/m, z7.d ; CHECK-NEXT: str z0, [x8, #8, mul vl] -; CHECK-NEXT: fcmuo p3.s, p0/z, z26.s, z26.s -; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str z4, [x8, #7, mul vl] -; CHECK-NEXT: mov z12.d, p4/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p2.s, p0/z, z3.s, z25.s -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p4/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p4.s, p0/z, z24.s, z24.s +; CHECK-NEXT: str z5, [x8, #7, mul vl] +; CHECK-NEXT: fcmuo p0.s, p0/z, z2.s, z2.s +; CHECK-NEXT: mov z31.d, p2/m, #0 // =0x0 +; CHECK-NEXT: str z16, [x8, #6, mul vl] ; CHECK-NEXT: mov z8.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.s, p0/z, z0.s, z0.s -; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: str z12, [x8, #6, mul vl] -; CHECK-NEXT: str z1, [x8, #5, mul vl] -; CHECK-NEXT: fcmuo p0.s, p0/z, z3.s, z3.s -; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: str z8, [x8, #3, mul vl] -; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p2/m, z29.d -; CHECK-NEXT: str z11, [x8, #4, mul vl] -; CHECK-NEXT: mov z27.d, p5/m, #0 // =0x0 -; CHECK-NEXT: str z31, [x8, #2, mul vl] +; CHECK-NEXT: mov z30.d, p5/m, #0 // =0x0 +; CHECK-NEXT: str z4, [x8, #5, mul vl] +; CHECK-NEXT: sel z0.d, p3, z7.d, z1.d +; CHECK-NEXT: str z31, [x8, #3, mul vl] +; CHECK-NEXT: mov z29.d, p4/m, #0 // =0x0 +; CHECK-NEXT: str z8, [x8, #4, mul vl] +; CHECK-NEXT: str z30, [x8, #2, mul vl] ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 -; CHECK-NEXT: str z27, [x8, #1, mul vl] +; CHECK-NEXT: str z29, [x8, #1, mul vl] ; CHECK-NEXT: str z0, [x8] -; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1183,6 +1024,8 @@ define @llrint_v32i64_v32f32( %x) { ; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload @@ -1202,20 +1045,17 @@ define @llrint_v1i64_v1f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f64( %x) @@ -1228,20 +1068,17 @@ define @llrint_v2i64_v2f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f64( %x) @@ -1252,41 +1089,28 @@ declare @llvm.llrint.nxv2i64.nxv2f64() define @llrint_v4i64_v4f64( %x) { ; CHECK-LABEL: llrint_v4i64_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z5.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d ; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z1.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z3.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z3.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmuo p3.d, p0/z, z0.d, z0.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d +; CHECK-NEXT: fcvtzs z4.d, p2/m, z1.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z5.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z1.d, z1.d -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p2, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv4i64.nxv4f64( %x) ret %a @@ -1298,7 +1122,6 @@ define @llrint_v8i64_v8f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill @@ -1308,52 +1131,42 @@ define @llrint_v8i64_v8f64( %x) { ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 ; CHECK-NEXT: mov z5.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov z6.d, #0x8000000000000000 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: frintx z2.d, p0/m, z2.d ; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: mov z6.d, x8 +; CHECK-NEXT: mov z25.d, x8 +; CHECK-NEXT: mov z7.d, #0x8000000000000000 +; CHECK-NEXT: mov z24.d, #0x8000000000000000 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z4.d ; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z4.d ; CHECK-NEXT: fcmge p3.d, p0/z, z2.d, z4.d ; CHECK-NEXT: fcmge p4.d, p0/z, z3.d, z4.d -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.d -; CHECK-NEXT: movprfx z24, z2 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z2.d -; CHECK-NEXT: movprfx z25, z3 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z3.d -; CHECK-NEXT: fcmgt p7.d, p0/z, z2.d, z6.d -; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z6.d -; CHECK-NEXT: fcmgt p6.d, p0/z, z1.d, z6.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z4.d, p1/m, z5.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z6.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d -; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d -; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d -; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d -; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d -; CHECK-NEXT: fcmuo p4.d, p0/z, z2.d, z2.d +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z25.d +; CHECK-NEXT: fcmuo p6.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcvtzs z5.d, p1/m, z0.d +; CHECK-NEXT: fcvtzs z6.d, p2/m, z1.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z25.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z25.d +; CHECK-NEXT: fcvtzs z7.d, p3/m, z2.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z2.d, z25.d +; CHECK-NEXT: fcvtzs z24.d, p4/m, z3.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z0.d, z0.d +; CHECK-NEXT: sel z0.d, p1, z4.d, z5.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z2.d, z2.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z3.d, z3.d -; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d -; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: sel z1.d, p2, z4.d, z6.d +; CHECK-NEXT: sel z2.d, p3, z4.d, z7.d +; CHECK-NEXT: sel z3.d, p5, z4.d, z24.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1367,7 +1180,7 @@ define @llrint_v16f64( %x) { ; CHECK-LABEL: llrint_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill @@ -1375,109 +1188,93 @@ define @llrint_v16f64( %x) { ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z25.d, x8 +; CHECK-NEXT: mov z26.d, #0x8000000000000000 +; CHECK-NEXT: mov z24.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff -; CHECK-NEXT: movprfx z26, z0 -; CHECK-NEXT: frintx z26.d, p0/m, z0.d -; CHECK-NEXT: movprfx z27, z1 -; CHECK-NEXT: frintx z27.d, p0/m, z1.d +; CHECK-NEXT: mov z27.d, #0x8000000000000000 +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: frintx z1.d, p0/m, z1.d +; CHECK-NEXT: movprfx z25, z4 +; CHECK-NEXT: frintx z25.d, p0/m, z4.d ; CHECK-NEXT: frintx z2.d, p0/m, z2.d -; CHECK-NEXT: mov z0.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: movprfx z28, z4 -; CHECK-NEXT: frintx z28.d, p0/m, z4.d ; CHECK-NEXT: frintx z5.d, p0/m, z5.d ; CHECK-NEXT: frintx z6.d, p0/m, z6.d +; CHECK-NEXT: mov z30.d, x8 +; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: frintx z7.d, p0/m, z7.d -; CHECK-NEXT: fcmge p1.d, p0/z, z26.d, z25.d -; CHECK-NEXT: fcmge p2.d, p0/z, z27.d, z25.d -; CHECK-NEXT: movprfx z4, z26 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z26.d -; CHECK-NEXT: fcmge p5.d, p0/z, z2.d, z25.d -; CHECK-NEXT: movprfx z29, z27 -; CHECK-NEXT: fcvtzs z29.d, p0/m, z27.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z26.d, z1.d -; CHECK-NEXT: fcmge p6.d, p0/z, z3.d, z25.d -; CHECK-NEXT: fcmge p8.d, p0/z, z5.d, z25.d -; CHECK-NEXT: fcmgt p7.d, p0/z, z27.d, z1.d -; CHECK-NEXT: fcmge p9.d, p0/z, z6.d, z25.d -; CHECK-NEXT: movprfx z30, z28 -; CHECK-NEXT: fcvtzs z30.d, p0/m, z28.d -; CHECK-NEXT: fcmge p10.d, p0/z, z7.d, z25.d -; CHECK-NEXT: not p4.b, p0/z, p1.b -; CHECK-NEXT: fcmuo p1.d, p0/z, z26.d, z26.d -; CHECK-NEXT: movprfx z26, z2 -; CHECK-NEXT: fcvtzs z26.d, p0/m, z2.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: movprfx z31, z6 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z6.d -; CHECK-NEXT: movprfx z8, z7 -; CHECK-NEXT: fcvtzs z8.d, p0/m, z7.d -; CHECK-NEXT: mov z4.d, p4/m, z0.d -; CHECK-NEXT: fcmge p4.d, p0/z, z28.d, z25.d -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: mov z29.d, p2/m, z0.d -; CHECK-NEXT: fcmuo p2.d, p0/z, z27.d, z27.d -; CHECK-NEXT: movprfx z27, z3 -; CHECK-NEXT: fcvtzs z27.d, p0/m, z3.d -; CHECK-NEXT: sel z25.d, p5, z0.d, z26.d -; CHECK-NEXT: movprfx z26, z5 -; CHECK-NEXT: fcvtzs z26.d, p0/m, z5.d -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: not p5.b, p0/z, p8.b -; CHECK-NEXT: fcmgt p8.d, p0/z, z2.d, z1.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z27.d, p6/m, z0.d -; CHECK-NEXT: not p6.b, p0/z, p9.b +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z24.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z24.d +; CHECK-NEXT: fcmge p5.d, p0/z, z25.d, z24.d +; CHECK-NEXT: fcmge p3.d, p0/z, z2.d, z24.d +; CHECK-NEXT: fcmge p4.d, p0/z, z3.d, z24.d +; CHECK-NEXT: fcmge p7.d, p0/z, z5.d, z24.d +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p6.d, p0/z, z6.d, z24.d +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: mov z9.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p8.d, p0/z, z25.d, z30.d +; CHECK-NEXT: fcmgt p10.d, p0/z, z6.d, z30.d +; CHECK-NEXT: fcvtzs z26.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z30.d +; CHECK-NEXT: fcvtzs z4.d, p2/m, z1.d +; CHECK-NEXT: fcmge p2.d, p0/z, z7.d, z24.d +; CHECK-NEXT: mov z24.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z27.d, p3/m, z2.d +; CHECK-NEXT: fcvtzs z28.d, p4/m, z3.d +; CHECK-NEXT: fcvtzs z29.d, p5/m, z25.d +; CHECK-NEXT: fcvtzs z31.d, p7/m, z5.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z30.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z2.d, z30.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z3.d, z30.d +; CHECK-NEXT: fcvtzs z8.d, p6/m, z6.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z0.d, z0.d +; CHECK-NEXT: sel z0.d, p1, z9.d, z26.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z5.d, z30.d +; CHECK-NEXT: fcvtzs z24.d, p2/m, z7.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z7.d, z30.d +; CHECK-NEXT: fcmuo p6.d, p0/z, z1.d, z1.d ; CHECK-NEXT: fcmuo p9.d, p0/z, z2.d, z2.d -; CHECK-NEXT: mov z30.d, p4/m, z0.d -; CHECK-NEXT: not p4.b, p0/z, p10.b -; CHECK-NEXT: fcmgt p10.d, p0/z, z3.d, z1.d -; CHECK-NEXT: mov z26.d, p5/m, z0.d -; CHECK-NEXT: fcmgt p5.d, p0/z, z28.d, z1.d -; CHECK-NEXT: mov z31.d, p6/m, z0.d -; CHECK-NEXT: mov z8.d, p4/m, z0.d -; CHECK-NEXT: sel z0.d, p3, z24.d, z4.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z5.d, z1.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z6.d, z1.d -; CHECK-NEXT: fcmgt p6.d, p0/z, z7.d, z1.d -; CHECK-NEXT: sel z1.d, p7, z24.d, z29.d -; CHECK-NEXT: fcmuo p7.d, p0/z, z3.d, z3.d -; CHECK-NEXT: sel z2.d, p8, z24.d, z25.d -; CHECK-NEXT: sel z3.d, p10, z24.d, z27.d -; CHECK-NEXT: sel z4.d, p5, z24.d, z30.d -; CHECK-NEXT: fcmuo p5.d, p0/z, z28.d, z28.d -; CHECK-NEXT: fcmuo p8.d, p0/z, z5.d, z5.d -; CHECK-NEXT: fcmuo p10.d, p0/z, z6.d, z6.d -; CHECK-NEXT: sel z5.d, p3, z24.d, z26.d +; CHECK-NEXT: sel z1.d, p4, z9.d, z4.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z3.d, z3.d +; CHECK-NEXT: sel z2.d, p5, z9.d, z27.d +; CHECK-NEXT: sel z3.d, p7, z9.d, z28.d +; CHECK-NEXT: sel z4.d, p8, z9.d, z29.d +; CHECK-NEXT: fcmuo p5.d, p0/z, z25.d, z25.d +; CHECK-NEXT: fcmuo p7.d, p0/z, z5.d, z5.d +; CHECK-NEXT: fcmuo p8.d, p0/z, z6.d, z6.d +; CHECK-NEXT: sel z5.d, p1, z9.d, z31.d +; CHECK-NEXT: sel z6.d, p10, z9.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: fcmuo p0.d, p0/z, z7.d, z7.d -; CHECK-NEXT: sel z6.d, p4, z24.d, z31.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z7.d, p6, z24.d, z8.d -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: sel z7.d, p2, z9.d, z24.d +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z3.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 -; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z5.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z6.d, p8/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv16i64.nxv16f64( %x) @@ -1490,6 +1287,8 @@ define @llrint_v32f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p11, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill @@ -1512,8 +1311,8 @@ define @llrint_v32f64( %x) { ; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG @@ -1526,219 +1325,176 @@ define @llrint_v32f64( %x) { ; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr z2, [x0, #2, mul vl] -; CHECK-NEXT: mov x9, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: ldr z24, [x0, #6, mul vl] ; CHECK-NEXT: ldr z1, [x0, #1, mul vl] -; CHECK-NEXT: mov z7.d, x9 -; CHECK-NEXT: mov z26.d, #0x8000000000000000 -; CHECK-NEXT: ldr z3, [x0, #3, mul vl] +; CHECK-NEXT: ldr z6, [x0, #4, mul vl] +; CHECK-NEXT: mov x9, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: ldr z5, [x0, #3, mul vl] +; CHECK-NEXT: mov z25.d, x9 +; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z30, z2 -; CHECK-NEXT: frintx z30.d, p0/m, z2.d -; CHECK-NEXT: ldr z6, [x0, #5, mul vl] -; CHECK-NEXT: movprfx z25, z24 -; CHECK-NEXT: frintx z25.d, p0/m, z24.d -; CHECK-NEXT: movprfx z12, z1 -; CHECK-NEXT: frintx z12.d, p0/m, z1.d -; CHECK-NEXT: ldr z5, [x0, #4, mul vl] -; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: mov x9, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: frintx z4.d, p0/m, z2.d +; CHECK-NEXT: mov z27.d, #0x8000000000000000 +; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: frintx z6.d, p0/m, z6.d -; CHECK-NEXT: mov z4.d, x9 -; CHECK-NEXT: fcmge p3.d, p0/z, z0.d, z7.d -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z0.d -; CHECK-NEXT: fcmge p5.d, p0/z, z30.d, z7.d -; CHECK-NEXT: movprfx z28, z30 -; CHECK-NEXT: fcvtzs z28.d, p0/m, z30.d -; CHECK-NEXT: str z0, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z30.d, #0x8000000000000000 ; CHECK-NEXT: frintx z5.d, p0/m, z5.d -; CHECK-NEXT: fcmge p4.d, p0/z, z12.d, z7.d -; CHECK-NEXT: ldr z8, [x0, #7, mul vl] -; CHECK-NEXT: ldr z9, [x0, #15, mul vl] -; CHECK-NEXT: movprfx z27, z12 -; CHECK-NEXT: fcvtzs z27.d, p0/m, z12.d -; CHECK-NEXT: fcmge p6.d, p0/z, z3.d, z7.d -; CHECK-NEXT: fcmge p9.d, p0/z, z6.d, z7.d -; CHECK-NEXT: not p7.b, p0/z, p3.b -; CHECK-NEXT: movprfx z31, z3 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z3.d -; CHECK-NEXT: movprfx z15, z6 -; CHECK-NEXT: fcvtzs z15.d, p0/m, z6.d -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: fcmge p8.d, p0/z, z5.d, z7.d -; CHECK-NEXT: movprfx z13, z5 -; CHECK-NEXT: fcvtzs z13.d, p0/m, z5.d -; CHECK-NEXT: sel z0.d, p7, z26.d, z24.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: movprfx z17, z25 -; CHECK-NEXT: fcvtzs z17.d, p0/m, z25.d -; CHECK-NEXT: not p3.b, p0/z, p6.b -; CHECK-NEXT: fcmge p6.d, p0/z, z25.d, z7.d -; CHECK-NEXT: movprfx z22, z9 -; CHECK-NEXT: frintx z22.d, p0/m, z9.d -; CHECK-NEXT: sel z29.d, p4, z26.d, z27.d -; CHECK-NEXT: movprfx z27, z8 -; CHECK-NEXT: frintx z27.d, p0/m, z8.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z12.d, z4.d -; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: sel z0.d, p5, z26.d, z28.d -; CHECK-NEXT: not p4.b, p0/z, p8.b -; CHECK-NEXT: ldr z10, [x0, #8, mul vl] -; CHECK-NEXT: not p5.b, p0/z, p9.b -; CHECK-NEXT: sel z24.d, p3, z26.d, z31.d -; CHECK-NEXT: not p3.b, p0/z, p6.b -; CHECK-NEXT: movprfx z2, z22 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z22.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z30.d, z4.d +; CHECK-NEXT: mov z26.d, #0x8000000000000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z13.d, #0x8000000000000000 +; CHECK-NEXT: mov z12.d, #0x8000000000000000 +; CHECK-NEXT: mov x10, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fcmge p7.d, p0/z, z27.d, z7.d -; CHECK-NEXT: sel z31.d, p5, z26.d, z15.d -; CHECK-NEXT: ldr z11, [x0, #9, mul vl] -; CHECK-NEXT: movprfx z28, z10 -; CHECK-NEXT: frintx z28.d, p0/m, z10.d -; CHECK-NEXT: ldr z10, [x0, #10, mul vl] -; CHECK-NEXT: ldr z18, [x0, #11, mul vl] -; CHECK-NEXT: ldr z16, [x0, #13, mul vl] -; CHECK-NEXT: ldr z14, [x0, #14, mul vl] -; CHECK-NEXT: ldr z19, [x0, #12, mul vl] -; CHECK-NEXT: mov z17.d, p3/m, z26.d -; CHECK-NEXT: fcmgt p9.d, p0/z, z3.d, z4.d -; CHECK-NEXT: movprfx z8, z11 -; CHECK-NEXT: frintx z8.d, p0/m, z11.d -; CHECK-NEXT: sel z11.d, p4, z26.d, z13.d -; CHECK-NEXT: frintx z10.d, p0/m, z10.d -; CHECK-NEXT: movprfx z13, z18 -; CHECK-NEXT: frintx z13.d, p0/m, z18.d -; CHECK-NEXT: fcmge p5.d, p0/z, z28.d, z7.d -; CHECK-NEXT: movprfx z18, z27 -; CHECK-NEXT: fcvtzs z18.d, p0/m, z27.d +; CHECK-NEXT: fcmge p3.d, p0/z, z4.d, z25.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z25.d +; CHECK-NEXT: ldr z29, [x0, #7, mul vl] +; CHECK-NEXT: ldr z24, [x0, #6, mul vl] +; CHECK-NEXT: ldr z10, [x0, #9, mul vl] +; CHECK-NEXT: ldr z8, [x0, #8, mul vl] +; CHECK-NEXT: ldr z7, [x0, #5, mul vl] +; CHECK-NEXT: ldr z14, [x0, #15, mul vl] +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z25.d +; CHECK-NEXT: fcmge p5.d, p0/z, z6.d, z25.d +; CHECK-NEXT: ldr z15, [x0, #14, mul vl] +; CHECK-NEXT: frintx z29.d, p0/m, z29.d +; CHECK-NEXT: frintx z24.d, p0/m, z24.d +; CHECK-NEXT: movprfx z11, z10 +; CHECK-NEXT: frintx z11.d, p0/m, z10.d +; CHECK-NEXT: fcmge p4.d, p0/z, z5.d, z25.d +; CHECK-NEXT: movprfx z9, z8 +; CHECK-NEXT: frintx z9.d, p0/m, z8.d +; CHECK-NEXT: ldr z16, [x0, #11, mul vl] +; CHECK-NEXT: ldr z20, [x0, #13, mul vl] +; CHECK-NEXT: frintx z7.d, p0/m, z7.d +; CHECK-NEXT: fcvtzs z28.d, p3/m, z4.d +; CHECK-NEXT: mov z10.d, #0x8000000000000000 +; CHECK-NEXT: ldr z18, [x0, #12, mul vl] +; CHECK-NEXT: movprfx z19, z14 +; CHECK-NEXT: frintx z19.d, p0/m, z14.d +; CHECK-NEXT: fcmge p3.d, p0/z, z29.d, z25.d +; CHECK-NEXT: ldr z17, [x0, #10, mul vl] +; CHECK-NEXT: frintx z15.d, p0/m, z15.d +; CHECK-NEXT: fcvtzs z27.d, p2/m, z1.d +; CHECK-NEXT: fcvtzs z30.d, p5/m, z6.d +; CHECK-NEXT: fcmge p2.d, p0/z, z24.d, z25.d +; CHECK-NEXT: fcmge p5.d, p0/z, z11.d, z25.d +; CHECK-NEXT: mov z14.d, #0x8000000000000000 ; CHECK-NEXT: frintx z16.d, p0/m, z16.d -; CHECK-NEXT: movprfx z15, z19 -; CHECK-NEXT: frintx z15.d, p0/m, z19.d -; CHECK-NEXT: movprfx z19, z28 -; CHECK-NEXT: fcvtzs z19.d, p0/m, z28.d -; CHECK-NEXT: movprfx z21, z14 -; CHECK-NEXT: frintx z21.d, p0/m, z14.d -; CHECK-NEXT: not p4.b, p0/z, p7.b -; CHECK-NEXT: fcmge p6.d, p0/z, z8.d, z7.d -; CHECK-NEXT: movprfx z20, z8 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z8.d -; CHECK-NEXT: fcmge p7.d, p0/z, z10.d, z7.d -; CHECK-NEXT: fcmge p8.d, p0/z, z13.d, z7.d -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: sel z9.d, p4, z26.d, z18.d -; CHECK-NEXT: fcmge p4.d, p0/z, z16.d, z7.d -; CHECK-NEXT: fcmge p3.d, p0/z, z15.d, z7.d -; CHECK-NEXT: movprfx z0, z16 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z16.d -; CHECK-NEXT: sel z14.d, p5, z26.d, z19.d -; CHECK-NEXT: movprfx z19, z10 -; CHECK-NEXT: fcvtzs z19.d, p0/m, z10.d -; CHECK-NEXT: movprfx z1, z21 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z21.d -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: movprfx z23, z15 -; CHECK-NEXT: fcvtzs z23.d, p0/m, z15.d -; CHECK-NEXT: not p5.b, p0/z, p7.b -; CHECK-NEXT: sel z18.d, p6, z26.d, z20.d -; CHECK-NEXT: fcmge p6.d, p0/z, z21.d, z7.d -; CHECK-NEXT: not p7.b, p0/z, p8.b -; CHECK-NEXT: fcmge p8.d, p0/z, z22.d, z7.d -; CHECK-NEXT: movprfx z20, z13 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z13.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z7.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z19.d, p5/m, z26.d -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z0.d, p4/m, z26.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z21.d, z4.d -; CHECK-NEXT: not p5.b, p0/z, p6.b -; CHECK-NEXT: mov z23.d, p3/m, z26.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z22.d, z4.d -; CHECK-NEXT: not p6.b, p0/z, p8.b -; CHECK-NEXT: mov z20.d, p7/m, z26.d -; CHECK-NEXT: fcmuo p8.d, p0/z, z22.d, z22.d -; CHECK-NEXT: mov z1.d, p5/m, z26.d -; CHECK-NEXT: fcmuo p5.d, p0/z, z21.d, z21.d -; CHECK-NEXT: fcmgt p7.d, p0/z, z25.d, z4.d -; CHECK-NEXT: mov z2.d, p6/m, z26.d -; CHECK-NEXT: sel z26.d, p1, z7.d, z29.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z16.d, z4.d -; CHECK-NEXT: ldr z29, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fcmgt p6.d, p0/z, z5.d, z4.d -; CHECK-NEXT: mov z24.d, p9/m, z7.d -; CHECK-NEXT: mov z1.d, p4/m, z7.d -; CHECK-NEXT: fcmuo p4.d, p0/z, z16.d, z16.d -; CHECK-NEXT: mov z2.d, p3/m, z7.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z15.d, z4.d -; CHECK-NEXT: mov z17.d, p7/m, z7.d -; CHECK-NEXT: mov z29.d, p2/m, z7.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z13.d, z4.d -; CHECK-NEXT: mov z0.d, p1/m, z7.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z10.d, z4.d -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: mov z11.d, p6/m, z7.d +; CHECK-NEXT: frintx z20.d, p0/m, z20.d +; CHECK-NEXT: fcvtzs z26.d, p4/m, z5.d +; CHECK-NEXT: fcmge p4.d, p0/z, z9.d, z25.d +; CHECK-NEXT: frintx z18.d, p0/m, z18.d +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.d +; CHECK-NEXT: fcmge p1.d, p0/z, z7.d, z25.d +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: frintx z17.d, p0/m, z17.d +; CHECK-NEXT: fcvtzs z10.d, p3/m, z29.d +; CHECK-NEXT: fcmge p3.d, p0/z, z19.d, z25.d +; CHECK-NEXT: mov z3.d, x10 +; CHECK-NEXT: fcmge p6.d, p0/z, z15.d, z25.d +; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z13.d, p2/m, z24.d +; CHECK-NEXT: fcvtzs z14.d, p5/m, z11.d +; CHECK-NEXT: fcmge p2.d, p0/z, z16.d, z25.d +; CHECK-NEXT: mov z22.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.d, p0/z, z20.d, z25.d +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z12.d, p4/m, z9.d +; CHECK-NEXT: fcmge p4.d, p0/z, z18.d, z25.d +; CHECK-NEXT: mov z23.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z8.d, p1/m, z7.d +; CHECK-NEXT: fcmge p1.d, p0/z, z17.d, z25.d +; CHECK-NEXT: fcvtzs z31.d, p3/m, z19.d +; CHECK-NEXT: mov z25.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p11.d, p0/z, z19.d, z3.d +; CHECK-NEXT: fcvtzs z21.d, p6/m, z15.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z15.d, z3.d ; CHECK-NEXT: fcmuo p6.d, p0/z, z15.d, z15.d -; CHECK-NEXT: fcmgt p5.d, p0/z, z8.d, z4.d -; CHECK-NEXT: mov z2.d, p8/m, #0 // =0x0 -; CHECK-NEXT: sel z16.d, p3, z7.d, z23.d -; CHECK-NEXT: fcmuo p3.d, p0/z, z10.d, z10.d -; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 -; CHECK-NEXT: sel z15.d, p2, z7.d, z20.d -; CHECK-NEXT: fcmuo p2.d, p0/z, z13.d, z13.d -; CHECK-NEXT: str z1, [x8, #14, mul vl] -; CHECK-NEXT: sel z1.d, p1, z7.d, z19.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z28.d, z4.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z27.d, z4.d -; CHECK-NEXT: str z2, [x8, #15, mul vl] -; CHECK-NEXT: sel z2.d, p5, z7.d, z18.d -; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.d, p0/z, z8.d, z8.d +; CHECK-NEXT: mov z15.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p7.d, p0/z, z1.d, z3.d +; CHECK-NEXT: fcvtzs z22.d, p2/m, z16.d +; CHECK-NEXT: fcvtzs z0.d, p5/m, z20.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z24.d, z3.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z20.d, z3.d +; CHECK-NEXT: fcvtzs z23.d, p4/m, z18.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z19.d, z19.d +; CHECK-NEXT: mov z31.d, p11/m, z25.d +; CHECK-NEXT: sel z19.d, p3, z25.d, z21.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z18.d, z3.d +; CHECK-NEXT: fcvtzs z15.d, p1/m, z17.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z20.d, z20.d +; CHECK-NEXT: mov z27.d, p7/m, z25.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z16.d, z3.d +; CHECK-NEXT: mov z13.d, p2/m, z25.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z17.d, z3.d +; CHECK-NEXT: mov z0.d, p5/m, z25.d +; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.d, p0/z, z18.d, z18.d +; CHECK-NEXT: sel z20.d, p3, z25.d, z23.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z16.d, z16.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z11.d, z3.d +; CHECK-NEXT: mov z19.d, p6/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p10.d, p0/z, z6.d, z3.d +; CHECK-NEXT: fcmgt p8.d, p0/z, z4.d, z3.d +; CHECK-NEXT: str z31, [x8, #15, mul vl] +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.d, p0/z, z17.d, z17.d +; CHECK-NEXT: sel z18.d, p7, z25.d, z22.d +; CHECK-NEXT: sel z31.d, p2, z25.d, z15.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z9.d, z3.d +; CHECK-NEXT: str z19, [x8, #14, mul vl] +; CHECK-NEXT: mov z20.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.d, p0/z, z11.d, z11.d ; CHECK-NEXT: str z0, [x8, #13, mul vl] -; CHECK-NEXT: mov z15.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p2.d, p0/z, z28.d, z28.d -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p3.d, p0/z, z6.d, z4.d -; CHECK-NEXT: sel z0.d, p1, z7.d, z14.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z27.d, z27.d -; CHECK-NEXT: sel z27.d, p4, z7.d, z9.d -; CHECK-NEXT: str z16, [x8, #12, mul vl] -; CHECK-NEXT: fcmuo p4.d, p0/z, z25.d, z25.d -; CHECK-NEXT: str z15, [x8, #11, mul vl] -; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.d, p0/z, z6.d, z6.d -; CHECK-NEXT: str z1, [x8, #10, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p3, z7.d, z31.d -; CHECK-NEXT: fcmuo p3.d, p0/z, z5.d, z5.d -; CHECK-NEXT: ldr z5, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: mov z27.d, p1/m, #0 // =0x0 -; CHECK-NEXT: str z2, [x8, #9, mul vl] -; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d +; CHECK-NEXT: mov z14.d, p5/m, z25.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z29.d, z3.d +; CHECK-NEXT: mov z18.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z31.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z7.d, z3.d +; CHECK-NEXT: str z20, [x8, #12, mul vl] +; CHECK-NEXT: fcmuo p3.d, p0/z, z9.d, z9.d +; CHECK-NEXT: sel z0.d, p2, z25.d, z12.d +; CHECK-NEXT: mov z14.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.d, p0/z, z7.d, z7.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z29.d, z29.d +; CHECK-NEXT: str z18, [x8, #11, mul vl] +; CHECK-NEXT: sel z29.d, p5, z25.d, z10.d +; CHECK-NEXT: fcmuo p5.d, p0/z, z24.d, z24.d +; CHECK-NEXT: str z31, [x8, #10, mul vl] +; CHECK-NEXT: sel z7.d, p1, z25.d, z8.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z6.d, z6.d +; CHECK-NEXT: ldr z6, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str z14, [x8, #9, mul vl] +; CHECK-NEXT: fcmgt p9.d, p0/z, z5.d, z3.d +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z29.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.d, p0/z, z5.d, z5.d +; CHECK-NEXT: mov z13.d, p5/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p5.d, p0/z, z4.d, z4.d +; CHECK-NEXT: mov z7.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p3.d, p0/z, z6.d, z3.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z6.d, z6.d ; CHECK-NEXT: str z0, [x8, #8, mul vl] -; CHECK-NEXT: mov z17.d, p4/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p4.d, p0/z, z30.d, z30.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z5.d, z4.d -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.d, p0/z, z12.d, z12.d -; CHECK-NEXT: str z27, [x8, #7, mul vl] -; CHECK-NEXT: fcmuo p0.d, p0/z, z5.d, z5.d -; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: mov z24.d, p1/m, #0 // =0x0 -; CHECK-NEXT: str z17, [x8, #6, mul vl] -; CHECK-NEXT: mov z29.d, p4/m, #0 // =0x0 -; CHECK-NEXT: str z1, [x8, #5, mul vl] -; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 -; CHECK-NEXT: str z11, [x8, #4, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, z7.d -; CHECK-NEXT: str z24, [x8, #3, mul vl] -; CHECK-NEXT: str z29, [x8, #2, mul vl] -; CHECK-NEXT: str z26, [x8, #1, mul vl] +; CHECK-NEXT: mov z28.d, p8/m, z25.d +; CHECK-NEXT: mov z26.d, p9/m, z25.d +; CHECK-NEXT: str z29, [x8, #7, mul vl] +; CHECK-NEXT: mov z30.d, p10/m, z25.d +; CHECK-NEXT: str z13, [x8, #6, mul vl] +; CHECK-NEXT: str z7, [x8, #5, mul vl] +; CHECK-NEXT: sel z0.d, p3, z25.d, z2.d +; CHECK-NEXT: mov z26.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z30.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z28.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z27.d, p4/m, #0 // =0x0 +; CHECK-NEXT: str z26, [x8, #3, mul vl] ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: str z30, [x8, #4, mul vl] +; CHECK-NEXT: str z28, [x8, #2, mul vl] +; CHECK-NEXT: str z27, [x8, #1, mul vl] ; CHECK-NEXT: str z0, [x8] -; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1755,6 +1511,8 @@ define @llrint_v32f64( %x) { ; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll index 908ba2392a437..aa5863901b9d3 100644 --- a/llvm/test/CodeGen/AArch64/sve-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -7,20 +7,17 @@ define @lrint_v1f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv1iXLen.nxv1f16( %x) @@ -33,20 +30,17 @@ define @lrint_v2f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv2iXLen.nxv2f16( %x) @@ -57,43 +51,30 @@ declare @llvm.lrint.nxv2iXLen.nxv2f16() define @lrint_v4f16( %x) { ; CHECK-LABEL: lrint_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z3.h, w8 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: mov z5.h, w8 ; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h ; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z3.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z3.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z1.h +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmuo p3.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z5.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p2, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv4iXLen.nxv4f16( %x) ret %a @@ -105,7 +86,6 @@ define @lrint_v8f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill @@ -117,8 +97,10 @@ define @lrint_v8f16( %x) { ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z6.h, w8 -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z6.d, #0x8000000000000000 +; CHECK-NEXT: mov z25.h, w8 +; CHECK-NEXT: mov z7.d, #0x8000000000000000 +; CHECK-NEXT: mov z24.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s @@ -133,41 +115,29 @@ define @lrint_v8f16( %x) { ; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, z4.h ; CHECK-NEXT: fcmge p3.h, p0/z, z3.h, z4.h ; CHECK-NEXT: fcmge p4.h, p0/z, z5.h, z4.h -; CHECK-NEXT: movprfx z4, z2 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.h -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.h -; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.h -; CHECK-NEXT: movprfx z25, z5 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z5.h -; CHECK-NEXT: fcmgt p7.h, p0/z, z3.h, z6.h -; CHECK-NEXT: fcmgt p5.h, p0/z, z2.h, z6.h -; CHECK-NEXT: fcmgt p6.h, p0/z, z1.h, z6.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z4.d, p1/m, z0.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z6.h -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: sel z6.d, p2, z0.d, z7.d -; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h -; CHECK-NEXT: sel z7.d, p3, z0.d, z24.d -; CHECK-NEXT: fcmuo p3.h, p0/z, z1.h, z1.h -; CHECK-NEXT: sel z24.d, p4, z0.d, z25.d -; CHECK-NEXT: fcmuo p4.h, p0/z, z3.h, z3.h +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p5.h, p0/z, z5.h, z25.h +; CHECK-NEXT: fcmuo p6.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcvtzs z0.d, p1/m, z2.h +; CHECK-NEXT: fcvtzs z6.d, p2/m, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z25.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z1.h, z25.h +; CHECK-NEXT: fcvtzs z7.d, p3/m, z3.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z3.h, z25.h +; CHECK-NEXT: fcvtzs z24.d, p4/m, z5.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z2.h, z2.h +; CHECK-NEXT: mov z0.d, p1/m, z4.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h -; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d -; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z26.d, z24.d +; CHECK-NEXT: sel z1.d, p2, z4.d, z6.d +; CHECK-NEXT: sel z2.d, p3, z4.d, z7.d +; CHECK-NEXT: sel z3.d, p5, z4.d, z24.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -181,7 +151,7 @@ define @lrint_v16f16( %x) { ; CHECK-LABEL: lrint_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill @@ -189,124 +159,110 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z4.s, z1.h -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: mov z5.h, w8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z25.d, #0x8000000000000000 -; CHECK-NEXT: mov z27.h, w8 -; CHECK-NEXT: mov z7.d, #0x7fffffffffffffff -; CHECK-NEXT: uunpklo z3.d, z2.s +; CHECK-NEXT: mov z7.d, #0x8000000000000000 +; CHECK-NEXT: mov z27.d, #0x8000000000000000 +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z6.d, z0.s +; CHECK-NEXT: uunpklo z5.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z24.d, z4.s -; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z26.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z6.d, z3.s +; CHECK-NEXT: uunpklo z25.d, z1.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: mov z10.d, #0x7fffffffffffffff +; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: frintx z2.h, p0/m, z2.h -; CHECK-NEXT: frintx z3.h, p0/m, z3.h +; CHECK-NEXT: frintx z5.h, p0/m, z5.h +; CHECK-NEXT: movprfx z26, z0 +; CHECK-NEXT: frintx z26.h, p0/m, z0.h +; CHECK-NEXT: uunpkhi z0.d, z1.s ; CHECK-NEXT: frintx z6.h, p0/m, z6.h -; CHECK-NEXT: movprfx z28, z0 -; CHECK-NEXT: frintx z28.h, p0/m, z0.h -; CHECK-NEXT: movprfx z29, z4 -; CHECK-NEXT: frintx z29.h, p0/m, z4.h -; CHECK-NEXT: frintx z24.h, p0/m, z24.h -; CHECK-NEXT: movprfx z30, z1 -; CHECK-NEXT: frintx z30.h, p0/m, z1.h -; CHECK-NEXT: frintx z26.h, p0/m, z26.h -; CHECK-NEXT: fcmge p5.h, p0/z, z2.h, z5.h -; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, z5.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.h -; CHECK-NEXT: fcmge p6.h, p0/z, z6.h, z5.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z3.h, z27.h -; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h -; CHECK-NEXT: fcmge p7.h, p0/z, z28.h, z5.h -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h -; CHECK-NEXT: fcmge p8.h, p0/z, z24.h, z5.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z2.h, z27.h -; CHECK-NEXT: fcmge p9.h, p0/z, z26.h, z5.h -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.h -; CHECK-NEXT: fcmge p10.h, p0/z, z30.h, z5.h -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: movprfx z31, z26 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z26.h -; CHECK-NEXT: movprfx z8, z30 -; CHECK-NEXT: fcvtzs z8.d, p0/m, z30.h -; CHECK-NEXT: mov z1.d, p5/m, z25.d -; CHECK-NEXT: fcmge p5.h, p0/z, z29.h, z5.h -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: mov z0.d, p2/m, z25.d -; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h -; CHECK-NEXT: movprfx z2, z28 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z28.h -; CHECK-NEXT: movprfx z5, z29 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z29.h -; CHECK-NEXT: not p7.b, p0/z, p7.b -; CHECK-NEXT: mov z3.d, p6/m, z25.d -; CHECK-NEXT: not p6.b, p0/z, p8.b -; CHECK-NEXT: fcmgt p8.h, p0/z, z6.h, z27.h -; CHECK-NEXT: mov z1.d, p4/m, z7.d -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: mov z0.d, p3/m, z7.d -; CHECK-NEXT: fcmgt p3.h, p0/z, z29.h, z27.h -; CHECK-NEXT: sel z9.d, p7, z25.d, z2.d -; CHECK-NEXT: not p7.b, p0/z, p9.b -; CHECK-NEXT: mov z4.d, p6/m, z25.d -; CHECK-NEXT: not p6.b, p0/z, p10.b -; CHECK-NEXT: fcmgt p10.h, p0/z, z28.h, z27.h -; CHECK-NEXT: mov z5.d, p5/m, z25.d -; CHECK-NEXT: fcmgt p5.h, p0/z, z24.h, z27.h -; CHECK-NEXT: fcmuo p9.h, p0/z, z6.h, z6.h -; CHECK-NEXT: sel z6.d, p7, z25.d, z31.d -; CHECK-NEXT: sel z25.d, p6, z25.d, z8.d -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: fcmgt p6.h, p0/z, z26.h, z27.h -; CHECK-NEXT: fcmgt p7.h, p0/z, z30.h, z27.h -; CHECK-NEXT: fcmuo p4.h, p0/z, z28.h, z28.h -; CHECK-NEXT: sel z2.d, p8, z7.d, z3.d -; CHECK-NEXT: sel z3.d, p10, z7.d, z9.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: fcmuo p8.h, p0/z, z29.h, z29.h -; CHECK-NEXT: mov z4.d, p5/m, z7.d -; CHECK-NEXT: fcmuo p5.h, p0/z, z24.h, z24.h -; CHECK-NEXT: fcmuo p10.h, p0/z, z26.h, z26.h -; CHECK-NEXT: mov z5.d, p3/m, z7.d -; CHECK-NEXT: mov z6.d, p6/m, z7.d +; CHECK-NEXT: movprfx z29, z3 +; CHECK-NEXT: frintx z29.h, p0/m, z3.h +; CHECK-NEXT: frintx z25.h, p0/m, z25.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.h, p0/z, z4.h, z24.h +; CHECK-NEXT: fcmge p2.h, p0/z, z2.h, z24.h +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z24.h +; CHECK-NEXT: movprfx z9, z0 +; CHECK-NEXT: frintx z9.h, p0/m, z0.h +; CHECK-NEXT: fcmge p4.h, p0/z, z26.h, z24.h +; CHECK-NEXT: fcmge p5.h, p0/z, z6.h, z24.h +; CHECK-NEXT: fcmge p7.h, p0/z, z25.h, z24.h +; CHECK-NEXT: fcmge p6.h, p0/z, z29.h, z24.h +; CHECK-NEXT: fcmgt p8.h, p0/z, z6.h, z1.h +; CHECK-NEXT: fcmgt p10.h, p0/z, z25.h, z1.h +; CHECK-NEXT: fcmuo p9.h, p0/z, z5.h, z5.h +; CHECK-NEXT: fcvtzs z7.d, p1/m, z4.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z1.h +; CHECK-NEXT: fcvtzs z27.d, p2/m, z2.h +; CHECK-NEXT: fcmge p2.h, p0/z, z9.h, z24.h +; CHECK-NEXT: mov z24.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z28.d, p3/m, z5.h +; CHECK-NEXT: fcvtzs z3.d, p4/m, z26.h +; CHECK-NEXT: fcvtzs z30.d, p5/m, z6.h +; CHECK-NEXT: fcvtzs z8.d, p7/m, z25.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z5.h, z1.h +; CHECK-NEXT: fcmgt p7.h, p0/z, z26.h, z1.h +; CHECK-NEXT: fcvtzs z31.d, p6/m, z29.h +; CHECK-NEXT: sel z0.d, p1, z10.d, z7.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z29.h, z1.h +; CHECK-NEXT: fcvtzs z24.d, p2/m, z9.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z9.h, z1.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z4.h, z4.h +; CHECK-NEXT: fcmuo p6.h, p0/z, z2.h, z2.h +; CHECK-NEXT: sel z4.d, p8, z10.d, z30.d +; CHECK-NEXT: fcmuo p8.h, p0/z, z25.h, z25.h +; CHECK-NEXT: sel z1.d, p4, z10.d, z27.d +; CHECK-NEXT: fcmuo p4.h, p0/z, z26.h, z26.h +; CHECK-NEXT: sel z2.d, p5, z10.d, z28.d +; CHECK-NEXT: mov z3.d, p7/m, z10.d +; CHECK-NEXT: fcmuo p5.h, p0/z, z6.h, z6.h +; CHECK-NEXT: fcmuo p7.h, p0/z, z29.h, z29.h +; CHECK-NEXT: sel z5.d, p1, z10.d, z31.d +; CHECK-NEXT: sel z6.d, p10, z10.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: fcmuo p0.h, p0/z, z9.h, z9.h +; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: sel z7.d, p2, z10.d, z24.d +; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h -; CHECK-NEXT: sel z7.d, p7, z7.d, z25.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 -; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 -; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p7/m, #0 // =0x0 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z6.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv16iXLen.nxv16f16( %x) @@ -319,6 +275,8 @@ define @lrint_v32f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p11, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill @@ -341,8 +299,8 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG @@ -353,230 +311,191 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: uunpkhi z5.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w9, #64511 // =0xfbff +; CHECK-NEXT: uunpklo z25.s, z1.h +; CHECK-NEXT: uunpkhi z10.s, z1.h +; CHECK-NEXT: uunpklo z9.s, z2.h ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: mov z26.h, w9 -; CHECK-NEXT: uunpkhi z25.s, z1.h +; CHECK-NEXT: uunpkhi z12.s, z3.h +; CHECK-NEXT: mov z27.h, w9 ; CHECK-NEXT: mov w9, #31743 // =0x7bff -; CHECK-NEXT: mov z27.d, #0x8000000000000000 -; CHECK-NEXT: uunpklo z31.s, z2.h -; CHECK-NEXT: uunpkhi z12.s, z2.h -; CHECK-NEXT: mov z17.d, z3.d -; CHECK-NEXT: uunpklo z0.d, z4.s -; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z7.d, z5.s -; CHECK-NEXT: uunpkhi z24.d, z5.s -; CHECK-NEXT: uunpklo z28.d, z6.s -; CHECK-NEXT: uunpkhi z29.d, z6.s -; CHECK-NEXT: uunpklo z8.d, z25.s -; CHECK-NEXT: uunpkhi z9.d, z25.s -; CHECK-NEXT: uunpklo z16.s, z17.h -; CHECK-NEXT: uunpklo z11.d, z31.s -; CHECK-NEXT: uunpkhi z14.d, z31.s -; CHECK-NEXT: uunpkhi z17.s, z17.h -; CHECK-NEXT: movprfx z30, z4 -; CHECK-NEXT: frintx z30.h, p0/m, z4.h -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: frintx z4.h, p0/m, z7.h -; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: movprfx z6, z24 -; CHECK-NEXT: frintx z6.h, p0/m, z24.h -; CHECK-NEXT: movprfx z7, z28 -; CHECK-NEXT: frintx z7.h, p0/m, z28.h -; CHECK-NEXT: movprfx z25, z29 -; CHECK-NEXT: frintx z25.h, p0/m, z29.h -; CHECK-NEXT: movprfx z3, z9 -; CHECK-NEXT: frintx z3.h, p0/m, z9.h -; CHECK-NEXT: mov z5.h, w9 -; CHECK-NEXT: movprfx z31, z11 -; CHECK-NEXT: frintx z31.h, p0/m, z11.h -; CHECK-NEXT: movprfx z9, z14 -; CHECK-NEXT: frintx z9.h, p0/m, z14.h -; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z26.h -; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z26.h -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z0.h -; CHECK-NEXT: fcmge p2.h, p0/z, z30.h, z26.h -; CHECK-NEXT: movprfx z29, z4 -; CHECK-NEXT: fcvtzs z29.d, p0/m, z4.h -; CHECK-NEXT: fcmge p6.h, p0/z, z6.h, z26.h -; CHECK-NEXT: movprfx z28, z30 -; CHECK-NEXT: fcvtzs z28.d, p0/m, z30.h -; CHECK-NEXT: movprfx z10, z6 -; CHECK-NEXT: fcvtzs z10.d, p0/m, z6.h -; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: fcmge p3.h, p0/z, z7.h, z26.h -; CHECK-NEXT: movprfx z13, z7 -; CHECK-NEXT: fcvtzs z13.d, p0/m, z7.h -; CHECK-NEXT: movprfx z15, z25 -; CHECK-NEXT: fcvtzs z15.d, p0/m, z25.h -; CHECK-NEXT: not p5.b, p0/z, p1.b -; CHECK-NEXT: movprfx z18, z3 -; CHECK-NEXT: fcvtzs z18.d, p0/m, z3.h -; CHECK-NEXT: movprfx z20, z31 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z31.h -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: movprfx z21, z9 -; CHECK-NEXT: fcvtzs z21.d, p0/m, z9.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z30.h, z5.h -; CHECK-NEXT: sel z0.d, p5, z27.d, z24.d -; CHECK-NEXT: not p7.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p2.h, p0/z, z4.h, z5.h -; CHECK-NEXT: mov z29.d, p4/m, z27.d -; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z26.h -; CHECK-NEXT: not p5.b, p0/z, p6.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: fcmge p6.h, p0/z, z9.h, z26.h -; CHECK-NEXT: fcmgt p9.h, p0/z, z6.h, z5.h -; CHECK-NEXT: str z0, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: sel z0.d, p7, z27.d, z28.d -; CHECK-NEXT: movprfx z28, z8 -; CHECK-NEXT: frintx z28.h, p0/m, z8.h -; CHECK-NEXT: sel z8.d, p5, z27.d, z10.d -; CHECK-NEXT: uunpklo z10.d, z12.s -; CHECK-NEXT: uunpkhi z12.d, z12.s -; CHECK-NEXT: not p5.b, p0/z, p4.b -; CHECK-NEXT: sel z11.d, p3, z27.d, z13.d -; CHECK-NEXT: uunpklo z13.d, z16.s -; CHECK-NEXT: fcmge p3.h, p0/z, z3.h, z26.h -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: sel z24.d, p5, z27.d, z15.d -; CHECK-NEXT: uunpkhi z15.d, z16.s -; CHECK-NEXT: movprfx z14, z28 -; CHECK-NEXT: fcvtzs z14.d, p0/m, z28.h -; CHECK-NEXT: frintx z10.h, p0/m, z10.h -; CHECK-NEXT: uunpklo z16.d, z17.s -; CHECK-NEXT: frintx z12.h, p0/m, z12.h -; CHECK-NEXT: uunpkhi z17.d, z17.s -; CHECK-NEXT: movprfx z19, z13 -; CHECK-NEXT: frintx z19.h, p0/m, z13.h -; CHECK-NEXT: fcmge p4.h, p0/z, z28.h, z26.h -; CHECK-NEXT: fcmge p5.h, p0/z, z31.h, z26.h -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: frintx z15.h, p0/m, z15.h -; CHECK-NEXT: fcmge p7.h, p0/z, z10.h, z26.h +; CHECK-NEXT: uunpkhi z14.s, z2.h +; CHECK-NEXT: uunpklo z15.s, z3.h +; CHECK-NEXT: uunpklo z7.d, z0.s +; CHECK-NEXT: uunpklo z5.d, z4.s +; CHECK-NEXT: uunpkhi z6.d, z4.s +; CHECK-NEXT: uunpklo z29.d, z25.s +; CHECK-NEXT: uunpkhi z26.d, z0.s +; CHECK-NEXT: uunpklo z8.d, z10.s +; CHECK-NEXT: uunpkhi z11.d, z10.s +; CHECK-NEXT: uunpklo z10.d, z9.s +; CHECK-NEXT: uunpkhi z13.d, z9.s +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z16.d, z12.s +; CHECK-NEXT: uunpklo z18.d, z14.s +; CHECK-NEXT: movprfx z1, z7 +; CHECK-NEXT: frintx z1.h, p0/m, z7.h +; CHECK-NEXT: movprfx z4, z5 +; CHECK-NEXT: frintx z4.h, p0/m, z5.h +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: frintx z5.h, p0/m, z6.h +; CHECK-NEXT: movprfx z7, z29 +; CHECK-NEXT: frintx z7.h, p0/m, z29.h +; CHECK-NEXT: movprfx z6, z26 +; CHECK-NEXT: frintx z6.h, p0/m, z26.h +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z9, z11 +; CHECK-NEXT: frintx z9.h, p0/m, z11.h +; CHECK-NEXT: movprfx z3, z10 +; CHECK-NEXT: frintx z3.h, p0/m, z10.h +; CHECK-NEXT: movprfx z10, z13 +; CHECK-NEXT: frintx z10.h, p0/m, z13.h +; CHECK-NEXT: uunpkhi z26.d, z25.s +; CHECK-NEXT: uunpkhi z13.d, z12.s +; CHECK-NEXT: frintx z8.h, p0/m, z8.h +; CHECK-NEXT: fcmge p3.h, p0/z, z1.h, z27.h +; CHECK-NEXT: uunpkhi z14.d, z14.s +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p2.h, p0/z, z7.h, z27.h +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z27.h +; CHECK-NEXT: fcmge p5.h, p0/z, z5.h, z27.h +; CHECK-NEXT: uunpklo z19.d, z15.s +; CHECK-NEXT: uunpkhi z15.d, z15.s +; CHECK-NEXT: movprfx z20, z13 +; CHECK-NEXT: frintx z20.h, p0/m, z13.h +; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z27.h +; CHECK-NEXT: frintx z26.h, p0/m, z26.h +; CHECK-NEXT: fcvtzs z29.d, p3/m, z1.h +; CHECK-NEXT: fcmge p3.h, p0/z, z9.h, z27.h +; CHECK-NEXT: mov z11.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z31.d, p2/m, z7.h +; CHECK-NEXT: fcmge p2.h, p0/z, z8.h, z27.h +; CHECK-NEXT: mov z17.d, #0x8000000000000000 ; CHECK-NEXT: frintx z16.h, p0/m, z16.h -; CHECK-NEXT: fcmge p8.h, p0/z, z12.h, z26.h -; CHECK-NEXT: frintx z17.h, p0/m, z17.h -; CHECK-NEXT: movprfx z23, z19 -; CHECK-NEXT: fcvtzs z23.d, p0/m, z19.h -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: sel z13.d, p3, z27.d, z18.d -; CHECK-NEXT: fcmge p3.h, p0/z, z19.h, z26.h -; CHECK-NEXT: movprfx z0, z15 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z15.h -; CHECK-NEXT: sel z22.d, p4, z27.d, z14.d -; CHECK-NEXT: sel z18.d, p6, z27.d, z21.d -; CHECK-NEXT: movprfx z21, z12 -; CHECK-NEXT: fcvtzs z21.d, p0/m, z12.h -; CHECK-NEXT: movprfx z1, z16 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z16.h -; CHECK-NEXT: sel z14.d, p5, z27.d, z20.d -; CHECK-NEXT: fcmge p4.h, p0/z, z15.h, z26.h -; CHECK-NEXT: movprfx z20, z10 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z10.h -; CHECK-NEXT: movprfx z2, z17 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z17.h -; CHECK-NEXT: not p5.b, p0/z, p7.b -; CHECK-NEXT: fcmge p6.h, p0/z, z16.h, z26.h -; CHECK-NEXT: not p7.b, p0/z, p8.b -; CHECK-NEXT: fcmge p8.h, p0/z, z17.h, z26.h -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z20.d, p5/m, z27.d -; CHECK-NEXT: mov z21.d, p7/m, z27.d -; CHECK-NEXT: not p5.b, p0/z, p6.b -; CHECK-NEXT: mov z23.d, p3/m, z27.d -; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z5.h -; CHECK-NEXT: not p6.b, p0/z, p8.b -; CHECK-NEXT: mov z0.d, p4/m, z27.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z16.h, z5.h -; CHECK-NEXT: mov z1.d, p5/m, z27.d -; CHECK-NEXT: fcmuo p5.h, p0/z, z16.h, z16.h -; CHECK-NEXT: mov z29.d, p2/m, z26.d -; CHECK-NEXT: mov z2.d, p6/m, z27.d -; CHECK-NEXT: ldr z27, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fcmgt p6.h, p0/z, z7.h, z5.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z12.h, z5.h -; CHECK-NEXT: fcmuo p8.h, p0/z, z17.h, z17.h -; CHECK-NEXT: fcmgt p7.h, p0/z, z28.h, z5.h -; CHECK-NEXT: mov z1.d, p4/m, z26.d -; CHECK-NEXT: fcmuo p4.h, p0/z, z15.h, z15.h -; CHECK-NEXT: mov z8.d, p9/m, z26.d -; CHECK-NEXT: mov z27.d, p1/m, z26.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z15.h, z5.h -; CHECK-NEXT: mov z2.d, p3/m, z26.d -; CHECK-NEXT: fcmgt p3.h, p0/z, z19.h, z5.h -; CHECK-NEXT: mov z11.d, p6/m, z26.d -; CHECK-NEXT: fcmuo p6.h, p0/z, z19.h, z19.h -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p5.h, p0/z, z9.h, z5.h -; CHECK-NEXT: sel z15.d, p2, z26.d, z21.d -; CHECK-NEXT: fcmuo p2.h, p0/z, z12.h, z12.h -; CHECK-NEXT: mov z2.d, p8/m, #0 // =0x0 -; CHECK-NEXT: sel z16.d, p7, z26.d, z22.d -; CHECK-NEXT: mov z0.d, p1/m, z26.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z10.h, z5.h -; CHECK-NEXT: str z1, [x8, #14, mul vl] -; CHECK-NEXT: sel z17.d, p3, z26.d, z23.d -; CHECK-NEXT: fcmuo p3.h, p0/z, z10.h, z10.h -; CHECK-NEXT: str z2, [x8, #15, mul vl] -; CHECK-NEXT: sel z2.d, p5, z26.d, z18.d -; CHECK-NEXT: fcmuo p5.h, p0/z, z9.h, z9.h -; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p4.h, p0/z, z3.h, z5.h -; CHECK-NEXT: mov z15.d, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p1, z26.d, z20.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z31.h, z5.h -; CHECK-NEXT: mov z17.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p2.h, p0/z, z31.h, z31.h +; CHECK-NEXT: frintx z14.h, p0/m, z14.h +; CHECK-NEXT: fcvtzs z0.d, p4/m, z4.h +; CHECK-NEXT: fcvtzs z28.d, p5/m, z5.h +; CHECK-NEXT: fcmge p4.h, p0/z, z3.h, z27.h +; CHECK-NEXT: mov z12.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.h, p0/z, z10.h, z27.h +; CHECK-NEXT: mov z13.d, #0x8000000000000000 +; CHECK-NEXT: frintx z19.h, p0/m, z19.h +; CHECK-NEXT: frintx z15.h, p0/m, z15.h +; CHECK-NEXT: mov z24.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z30.d, p1/m, z6.h +; CHECK-NEXT: fcmge p1.h, p0/z, z26.h, z27.h +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: frintx z18.h, p0/m, z18.h +; CHECK-NEXT: fcvtzs z11.d, p3/m, z9.h +; CHECK-NEXT: fcmge p3.h, p0/z, z20.h, z27.h +; CHECK-NEXT: mov z25.h, w9 +; CHECK-NEXT: fcvtzs z17.d, p2/m, z8.h +; CHECK-NEXT: fcmge p6.h, p0/z, z16.h, z27.h +; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p2.h, p0/z, z14.h, z27.h +; CHECK-NEXT: mov z22.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z12.d, p4/m, z3.h +; CHECK-NEXT: fcvtzs z13.d, p5/m, z10.h +; CHECK-NEXT: fcmge p4.h, p0/z, z19.h, z27.h +; CHECK-NEXT: mov z23.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.h, p0/z, z15.h, z27.h +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z26.h +; CHECK-NEXT: fcmge p1.h, p0/z, z18.h, z27.h +; CHECK-NEXT: fcvtzs z24.d, p3/m, z20.h +; CHECK-NEXT: mov z27.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p11.h, p0/z, z20.h, z25.h +; CHECK-NEXT: fcvtzs z21.d, p6/m, z16.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z16.h, z25.h +; CHECK-NEXT: fcmuo p6.h, p0/z, z16.h, z16.h +; CHECK-NEXT: fcvtzs z22.d, p2/m, z14.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z8.h, z25.h +; CHECK-NEXT: mov z16.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p7.h, p0/z, z5.h, z25.h +; CHECK-NEXT: fcvtzs z23.d, p4/m, z19.h +; CHECK-NEXT: fcvtzs z0.d, p5/m, z15.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z20.h, z20.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z15.h, z25.h +; CHECK-NEXT: mov z24.d, p11/m, z27.d +; CHECK-NEXT: sel z20.d, p3, z27.d, z21.d +; CHECK-NEXT: fcmgt p3.h, p0/z, z19.h, z25.h +; CHECK-NEXT: fcmgt p8.h, p0/z, z1.h, z25.h +; CHECK-NEXT: mov z17.d, p2/m, z27.d +; CHECK-NEXT: fcvtzs z16.d, p1/m, z18.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z18.h, z25.h +; CHECK-NEXT: mov z28.d, p7/m, z27.d +; CHECK-NEXT: fcmgt p7.h, p0/z, z14.h, z25.h +; CHECK-NEXT: fcmuo p1.h, p0/z, z15.h, z15.h +; CHECK-NEXT: mov z0.d, p5/m, z27.d +; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p5.h, p0/z, z10.h, z25.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z19.h, z19.h +; CHECK-NEXT: sel z19.d, p3, z27.d, z23.d +; CHECK-NEXT: fcmuo p3.h, p0/z, z14.h, z14.h +; CHECK-NEXT: mov z20.d, p6/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p9.h, p0/z, z6.h, z25.h +; CHECK-NEXT: fcmgt p10.h, p0/z, z7.h, z25.h +; CHECK-NEXT: str z24, [x8, #15, mul vl] +; CHECK-NEXT: sel z24.d, p2, z27.d, z16.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z25.h +; CHECK-NEXT: sel z15.d, p7, z27.d, z22.d +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z13.d, p5/m, z27.d +; CHECK-NEXT: str z20, [x8, #14, mul vl] +; CHECK-NEXT: fcmgt p5.h, p0/z, z9.h, z25.h +; CHECK-NEXT: fcmuo p1.h, p0/z, z18.h, z18.h +; CHECK-NEXT: mov z19.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.h, p0/z, z10.h, z10.h +; CHECK-NEXT: mov z29.d, p8/m, z27.d ; CHECK-NEXT: str z0, [x8, #13, mul vl] -; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.h, p0/z, z25.h, z25.h -; CHECK-NEXT: str z17, [x8, #12, mul vl] -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p3.h, p0/z, z25.h, z5.h +; CHECK-NEXT: mov z15.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p3.h, p0/z, z3.h, z3.h +; CHECK-NEXT: sel z0.d, p2, z27.d, z12.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z9.h, z9.h +; CHECK-NEXT: mov z30.d, p9/m, z27.d +; CHECK-NEXT: str z19, [x8, #12, mul vl] +; CHECK-NEXT: sel z3.d, p5, z27.d, z11.d +; CHECK-NEXT: mov z24.d, p1/m, #0 // =0x0 ; CHECK-NEXT: str z15, [x8, #11, mul vl] -; CHECK-NEXT: sel z0.d, p1, z26.d, z14.d -; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h -; CHECK-NEXT: sel z3.d, p4, z26.d, z13.d -; CHECK-NEXT: fcmuo p4.h, p0/z, z28.h, z28.h -; CHECK-NEXT: str z1, [x8, #10, mul vl] -; CHECK-NEXT: sel z1.d, p3, z26.d, z24.d -; CHECK-NEXT: fcmuo p3.h, p0/z, z7.h, z7.h -; CHECK-NEXT: ldr z7, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str z2, [x8, #9, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p1.h, p0/z, z6.h, z6.h -; CHECK-NEXT: mov z16.d, p4/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p4.h, p0/z, z4.h, z4.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z7.h, z5.h -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.h, p0/z, z30.h, z30.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z26.h, z25.h +; CHECK-NEXT: mov z13.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p5.h, p0/z, z8.h, z8.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z26.h, z26.h +; CHECK-NEXT: str z24, [x8, #10, mul vl] +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p3.h, p0/z, z4.h, z25.h +; CHECK-NEXT: str z13, [x8, #9, mul vl] +; CHECK-NEXT: fcmuo p2.h, p0/z, z6.h, z6.h +; CHECK-NEXT: mov z31.d, p10/m, z27.d ; CHECK-NEXT: str z0, [x8, #8, mul vl] -; CHECK-NEXT: fcmuo p0.h, p0/z, z7.h, z7.h -; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p1/m, z27.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z7.h, z7.h ; CHECK-NEXT: str z3, [x8, #7, mul vl] -; CHECK-NEXT: ldr z0, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str z16, [x8, #6, mul vl] -; CHECK-NEXT: mov z8.d, p1/m, #0 // =0x0 -; CHECK-NEXT: str z1, [x8, #5, mul vl] -; CHECK-NEXT: mov z29.d, p4/m, #0 // =0x0 -; CHECK-NEXT: mov z27.d, p5/m, #0 // =0x0 -; CHECK-NEXT: str z11, [x8, #4, mul vl] -; CHECK-NEXT: str z8, [x8, #3, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, z26.d -; CHECK-NEXT: str z29, [x8, #2, mul vl] -; CHECK-NEXT: str z27, [x8, #1, mul vl] +; CHECK-NEXT: mov z17.d, p5/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p5.h, p0/z, z1.h, z1.h +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.h, p0/z, z5.h, z5.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z4.h, z4.h +; CHECK-NEXT: str z17, [x8, #6, mul vl] +; CHECK-NEXT: mov z31.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z30.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, z27.d +; CHECK-NEXT: mov z29.d, p5/m, #0 // =0x0 +; CHECK-NEXT: str z2, [x8, #5, mul vl] +; CHECK-NEXT: str z31, [x8, #4, mul vl] +; CHECK-NEXT: mov z28.d, p4/m, #0 // =0x0 +; CHECK-NEXT: str z30, [x8, #3, mul vl] ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: str z29, [x8, #2, mul vl] +; CHECK-NEXT: str z28, [x8, #1, mul vl] ; CHECK-NEXT: str z0, [x8] -; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -593,6 +512,8 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload @@ -612,20 +533,17 @@ define @lrint_v1f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv1iXLen.nxv1f32( %x) @@ -638,20 +556,17 @@ define @lrint_v2f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv2iXLen.nxv2f32( %x) @@ -662,43 +577,30 @@ declare @llvm.lrint.nxv2iXLen.nxv2f32() define @lrint_v4f32( %x) { ; CHECK-LABEL: lrint_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff -; CHECK-NEXT: mov z3.s, w8 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: mov z5.s, w8 ; CHECK-NEXT: frintx z1.s, p0/m, z1.s ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.s -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z3.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z3.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmuo p3.s, p0/z, z1.s, z1.s +; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z5.s +; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z5.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p2, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv4iXLen.nxv4f32( %x) ret %a @@ -710,7 +612,6 @@ define @lrint_v8f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill @@ -719,57 +620,47 @@ define @lrint_v8f32( %x) { ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 -; CHECK-NEXT: uunpklo z3.d, z1.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z3.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: mov z4.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: mov z5.d, #0x8000000000000000 -; CHECK-NEXT: mov z6.s, w8 -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z6.d, #0x8000000000000000 +; CHECK-NEXT: mov z25.s, w8 +; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: frintx z2.s, p0/m, z2.s ; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: mov z24.d, #0x8000000000000000 ; CHECK-NEXT: frintx z3.s, p0/m, z3.s ; CHECK-NEXT: frintx z1.s, p0/m, z1.s ; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, z4.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z4.s -; CHECK-NEXT: movprfx z7, z0 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z0.s +; CHECK-NEXT: fcmuo p6.s, p0/z, z0.s, z0.s ; CHECK-NEXT: fcmge p3.s, p0/z, z3.s, z4.s ; CHECK-NEXT: fcmge p4.s, p0/z, z1.s, z4.s -; CHECK-NEXT: movprfx z4, z2 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.s -; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.s -; CHECK-NEXT: movprfx z25, z1 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z1.s -; CHECK-NEXT: fcmgt p7.s, p0/z, z3.s, z6.s -; CHECK-NEXT: fcmgt p5.s, p0/z, z2.s, z6.s -; CHECK-NEXT: fcmgt p6.s, p0/z, z0.s, z6.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z4.d, p1/m, z5.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z6.s -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d -; CHECK-NEXT: fcmuo p2.s, p0/z, z2.s, z2.s -; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d -; CHECK-NEXT: fcmuo p3.s, p0/z, z0.s, z0.s -; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d -; CHECK-NEXT: fcmuo p4.s, p0/z, z3.s, z3.s +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p5.s, p0/z, z1.s, z25.s +; CHECK-NEXT: fcvtzs z5.d, p1/m, z2.s +; CHECK-NEXT: fcvtzs z6.d, p2/m, z0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z25.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z25.s +; CHECK-NEXT: fcvtzs z7.d, p3/m, z3.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z3.s, z25.s +; CHECK-NEXT: fcvtzs z24.d, p4/m, z1.s +; CHECK-NEXT: fcmuo p4.s, p0/z, z2.s, z2.s +; CHECK-NEXT: sel z0.d, p1, z4.d, z5.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z3.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s -; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d -; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: sel z1.d, p2, z4.d, z6.d +; CHECK-NEXT: sel z2.d, p3, z4.d, z7.d +; CHECK-NEXT: sel z3.d, p5, z4.d, z24.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -783,7 +674,7 @@ define @lrint_v16f32( %x) { ; CHECK-LABEL: lrint_v16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill @@ -791,119 +682,106 @@ define @lrint_v16f32( %x) { ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG ; CHECK-NEXT: uunpklo z4.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z7.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: uunpklo z24.d, z2.s +; CHECK-NEXT: uunpklo z6.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z25.d, z3.s -; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff -; CHECK-NEXT: movprfx z5, z4 -; CHECK-NEXT: frintx z5.s, p0/m, z4.s -; CHECK-NEXT: movprfx z6, z0 -; CHECK-NEXT: frintx z6.s, p0/m, z0.s -; CHECK-NEXT: mov z4.s, w8 -; CHECK-NEXT: frintx z7.s, p0/m, z7.s -; CHECK-NEXT: movprfx z28, z1 -; CHECK-NEXT: frintx z28.s, p0/m, z1.s +; CHECK-NEXT: uunpklo z5.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z7.d, z3.s +; CHECK-NEXT: mov z24.s, w8 ; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff -; CHECK-NEXT: mov z0.d, #0x8000000000000000 -; CHECK-NEXT: frintx z24.s, p0/m, z24.s -; CHECK-NEXT: movprfx z29, z2 -; CHECK-NEXT: frintx z29.s, p0/m, z2.s -; CHECK-NEXT: frintx z25.s, p0/m, z25.s -; CHECK-NEXT: movprfx z30, z3 -; CHECK-NEXT: frintx z30.s, p0/m, z3.s -; CHECK-NEXT: mov z27.s, w8 -; CHECK-NEXT: fcmge p1.s, p0/z, z5.s, z4.s -; CHECK-NEXT: fcmge p2.s, p0/z, z6.s, z4.s -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s -; CHECK-NEXT: movprfx z2, z6 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s -; CHECK-NEXT: fcmge p5.s, p0/z, z7.s, z4.s -; CHECK-NEXT: fcmge p6.s, p0/z, z28.s, z4.s -; CHECK-NEXT: movprfx z3, z7 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s -; CHECK-NEXT: fcmge p8.s, p0/z, z29.s, z4.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z5.s, z27.s -; CHECK-NEXT: fcmgt p7.s, p0/z, z6.s, z27.s -; CHECK-NEXT: fcmge p9.s, p0/z, z25.s, z4.s -; CHECK-NEXT: movprfx z31, z25 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z25.s -; CHECK-NEXT: not p4.b, p0/z, p1.b -; CHECK-NEXT: fcmuo p1.s, p0/z, z5.s, z5.s -; CHECK-NEXT: movprfx z5, z28 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmge p10.s, p0/z, z30.s, z4.s -; CHECK-NEXT: movprfx z8, z30 -; CHECK-NEXT: fcvtzs z8.d, p0/m, z30.s -; CHECK-NEXT: mov z1.d, p4/m, z0.d -; CHECK-NEXT: fcmge p4.s, p0/z, z24.s, z4.s -; CHECK-NEXT: movprfx z4, z29 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z29.s -; CHECK-NEXT: mov z2.d, p2/m, z0.d -; CHECK-NEXT: fcmuo p2.s, p0/z, z6.s, z6.s -; CHECK-NEXT: movprfx z6, z24 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.s -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z3.d, p5/m, z0.d -; CHECK-NEXT: not p5.b, p0/z, p8.b -; CHECK-NEXT: mov z5.d, p6/m, z0.d -; CHECK-NEXT: fcmgt p8.s, p0/z, z7.s, z27.s -; CHECK-NEXT: not p6.b, p0/z, p9.b -; CHECK-NEXT: mov z6.d, p4/m, z0.d -; CHECK-NEXT: fcmuo p9.s, p0/z, z7.s, z7.s -; CHECK-NEXT: not p4.b, p0/z, p10.b -; CHECK-NEXT: fcmgt p10.s, p0/z, z28.s, z27.s -; CHECK-NEXT: sel z7.d, p5, z0.d, z4.d -; CHECK-NEXT: fcmgt p5.s, p0/z, z24.s, z27.s -; CHECK-NEXT: mov z31.d, p6/m, z0.d -; CHECK-NEXT: fcmgt p6.s, p0/z, z30.s, z27.s -; CHECK-NEXT: mov z8.d, p4/m, z0.d -; CHECK-NEXT: sel z0.d, p3, z26.d, z1.d -; CHECK-NEXT: fcmgt p3.s, p0/z, z29.s, z27.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z25.s, z27.s -; CHECK-NEXT: sel z1.d, p7, z26.d, z2.d -; CHECK-NEXT: fcmuo p7.s, p0/z, z28.s, z28.s -; CHECK-NEXT: sel z2.d, p8, z26.d, z3.d -; CHECK-NEXT: sel z3.d, p10, z26.d, z5.d -; CHECK-NEXT: fcmuo p8.s, p0/z, z29.s, z29.s -; CHECK-NEXT: sel z4.d, p5, z26.d, z6.d -; CHECK-NEXT: fcmuo p5.s, p0/z, z24.s, z24.s -; CHECK-NEXT: fcmuo p10.s, p0/z, z25.s, z25.s -; CHECK-NEXT: sel z5.d, p3, z26.d, z7.d -; CHECK-NEXT: fcmuo p0.s, p0/z, z30.s, z30.s -; CHECK-NEXT: sel z7.d, p6, z26.d, z8.d -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov z26.d, #0x8000000000000000 +; CHECK-NEXT: frintx z4.s, p0/m, z4.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: mov z30.s, w8 +; CHECK-NEXT: movprfx z27, z2 +; CHECK-NEXT: frintx z27.s, p0/m, z2.s +; CHECK-NEXT: uunpkhi z2.d, z3.s +; CHECK-NEXT: frintx z6.s, p0/m, z6.s +; CHECK-NEXT: movprfx z25, z1 +; CHECK-NEXT: frintx z25.s, p0/m, z1.s +; CHECK-NEXT: frintx z5.s, p0/m, z5.s +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.s, p0/z, z4.s, z24.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z24.s +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z9, z2 +; CHECK-NEXT: frintx z9.s, p0/m, z2.s +; CHECK-NEXT: fcmge p5.s, p0/z, z6.s, z24.s +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p3.s, p0/z, z5.s, z24.s +; CHECK-NEXT: fcmge p4.s, p0/z, z25.s, z24.s +; CHECK-NEXT: fcmge p7.s, p0/z, z7.s, z24.s +; CHECK-NEXT: fcmge p6.s, p0/z, z27.s, z24.s +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: mov z10.d, #0x7fffffffffffffff +; CHECK-NEXT: fcvtzs z1.d, p1/m, z4.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z4.s, z30.s +; CHECK-NEXT: fcvtzs z26.d, p2/m, z0.s +; CHECK-NEXT: fcmge p2.s, p0/z, z9.s, z24.s +; CHECK-NEXT: mov z24.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z29.d, p5/m, z6.s +; CHECK-NEXT: fcvtzs z3.d, p3/m, z5.s +; CHECK-NEXT: fcvtzs z28.d, p4/m, z25.s +; CHECK-NEXT: fcvtzs z8.d, p7/m, z7.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z30.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z5.s, z30.s +; CHECK-NEXT: fcmgt p7.s, p0/z, z25.s, z30.s +; CHECK-NEXT: fcmgt p8.s, p0/z, z6.s, z30.s +; CHECK-NEXT: fcvtzs z31.d, p6/m, z27.s +; CHECK-NEXT: fcmuo p6.s, p0/z, z0.s, z0.s +; CHECK-NEXT: sel z0.d, p1, z10.d, z1.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z27.s, z30.s +; CHECK-NEXT: fcmgt p10.s, p0/z, z7.s, z30.s +; CHECK-NEXT: fcvtzs z24.d, p2/m, z9.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z9.s, z30.s +; CHECK-NEXT: fcmuo p3.s, p0/z, z4.s, z4.s +; CHECK-NEXT: fcmuo p9.s, p0/z, z5.s, z5.s +; CHECK-NEXT: sel z1.d, p4, z10.d, z26.d +; CHECK-NEXT: fcmuo p4.s, p0/z, z25.s, z25.s +; CHECK-NEXT: sel z2.d, p5, z10.d, z3.d +; CHECK-NEXT: sel z3.d, p7, z10.d, z28.d +; CHECK-NEXT: sel z4.d, p8, z10.d, z29.d +; CHECK-NEXT: fcmuo p5.s, p0/z, z6.s, z6.s +; CHECK-NEXT: fcmuo p7.s, p0/z, z27.s, z27.s +; CHECK-NEXT: fcmuo p8.s, p0/z, z7.s, z7.s +; CHECK-NEXT: sel z5.d, p1, z10.d, z31.d +; CHECK-NEXT: sel z6.d, p10, z10.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: fcmuo p0.s, p0/z, z9.s, z9.s +; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: sel z7.d, p2, z10.d, z24.d +; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z6.d, p4, z26.d, z31.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 -; CHECK-NEXT: mov z3.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 -; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p7/m, #0 // =0x0 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z6.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv16iXLen.nxv16f32( %x) @@ -916,6 +794,8 @@ define @lrint_v32f32( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p11, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill @@ -938,8 +818,8 @@ define @lrint_v32f32( %x) { ; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG @@ -950,224 +830,185 @@ define @lrint_v32f32( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: uunpklo z24.d, z0.s -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z25.d, z0.s ; CHECK-NEXT: mov w9, #-553648128 // =0xdf000000 ; CHECK-NEXT: uunpklo z26.d, z1.s -; CHECK-NEXT: uunpkhi z25.d, z0.s -; CHECK-NEXT: uunpkhi z28.d, z1.s -; CHECK-NEXT: mov z29.s, w9 +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z27.d, z2.s +; CHECK-NEXT: uunpkhi z9.d, z2.s +; CHECK-NEXT: uunpklo z11.d, z3.s +; CHECK-NEXT: uunpkhi z12.d, z3.s +; CHECK-NEXT: mov z10.s, w9 ; CHECK-NEXT: mov w9, #1593835519 // =0x5effffff -; CHECK-NEXT: mov z17.d, z5.d -; CHECK-NEXT: mov z27.d, #0x8000000000000000 -; CHECK-NEXT: uunpkhi z30.d, z2.s -; CHECK-NEXT: uunpklo z8.d, z3.s ; CHECK-NEXT: movprfx z0, z24 ; CHECK-NEXT: frintx z0.s, p0/m, z24.s -; CHECK-NEXT: uunpkhi z9.d, z3.s +; CHECK-NEXT: movprfx z24, z25 +; CHECK-NEXT: frintx z24.s, p0/m, z25.s +; CHECK-NEXT: uunpklo z13.d, z4.s +; CHECK-NEXT: movprfx z25, z26 +; CHECK-NEXT: frintx z25.s, p0/m, z26.s +; CHECK-NEXT: movprfx z26, z1 +; CHECK-NEXT: frintx z26.s, p0/m, z1.s ; CHECK-NEXT: uunpkhi z14.d, z4.s -; CHECK-NEXT: movprfx z24, z26 -; CHECK-NEXT: frintx z24.s, p0/m, z26.s -; CHECK-NEXT: movprfx z1, z25 -; CHECK-NEXT: frintx z1.s, p0/m, z25.s -; CHECK-NEXT: movprfx z5, z28 -; CHECK-NEXT: frintx z5.s, p0/m, z28.s -; CHECK-NEXT: uunpklo z26.d, z2.s -; CHECK-NEXT: uunpklo z16.d, z17.s -; CHECK-NEXT: mov z25.s, w9 -; CHECK-NEXT: movprfx z28, z30 -; CHECK-NEXT: frintx z28.s, p0/m, z30.s -; CHECK-NEXT: movprfx z30, z8 -; CHECK-NEXT: frintx z30.s, p0/m, z8.s -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z29.s -; CHECK-NEXT: movprfx z31, z0 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z0.s -; CHECK-NEXT: str z0, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z29.s -; CHECK-NEXT: fcmge p3.s, p0/z, z24.s, z29.s -; CHECK-NEXT: fcmge p5.s, p0/z, z5.s, z29.s -; CHECK-NEXT: frintx z26.s, p0/m, z26.s -; CHECK-NEXT: movprfx z10, z1 -; CHECK-NEXT: fcvtzs z10.d, p0/m, z1.s -; CHECK-NEXT: movprfx z11, z24 -; CHECK-NEXT: fcvtzs z11.d, p0/m, z24.s -; CHECK-NEXT: movprfx z12, z5 -; CHECK-NEXT: fcvtzs z12.d, p0/m, z5.s -; CHECK-NEXT: movprfx z15, z28 -; CHECK-NEXT: fcvtzs z15.d, p0/m, z28.s -; CHECK-NEXT: str z1, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: not p4.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z25.s -; CHECK-NEXT: fcmgt p9.s, p0/z, z5.s, z25.s -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z0.d, p4, z27.d, z31.d -; CHECK-NEXT: fcmge p4.s, p0/z, z26.s, z29.s -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: movprfx z13, z26 -; CHECK-NEXT: fcvtzs z13.d, p0/m, z26.s -; CHECK-NEXT: sel z31.d, p2, z27.d, z10.d -; CHECK-NEXT: uunpklo z10.d, z4.s -; CHECK-NEXT: sel z8.d, p3, z27.d, z11.d -; CHECK-NEXT: fcmge p3.s, p0/z, z28.s, z29.s -; CHECK-NEXT: sel z11.d, p5, z27.d, z12.d -; CHECK-NEXT: movprfx z4, z9 -; CHECK-NEXT: frintx z4.s, p0/m, z9.s -; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: not p5.b, p0/z, p4.b -; CHECK-NEXT: fcmge p4.s, p0/z, z30.s, z29.s -; CHECK-NEXT: fcmgt p2.s, p0/z, z24.s, z25.s -; CHECK-NEXT: sel z12.d, p5, z27.d, z13.d -; CHECK-NEXT: uunpkhi z13.d, z17.s -; CHECK-NEXT: movprfx z9, z10 -; CHECK-NEXT: frintx z9.s, p0/m, z10.s -; CHECK-NEXT: movprfx z10, z14 -; CHECK-NEXT: frintx z10.s, p0/m, z14.s -; CHECK-NEXT: uunpkhi z17.d, z6.s -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: uunpklo z14.d, z6.s -; CHECK-NEXT: movprfx z6, z16 -; CHECK-NEXT: frintx z6.s, p0/m, z16.s -; CHECK-NEXT: uunpklo z16.d, z7.s +; CHECK-NEXT: movprfx z2, z27 +; CHECK-NEXT: frintx z2.s, p0/m, z27.s +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z27, z9 +; CHECK-NEXT: frintx z27.s, p0/m, z9.s +; CHECK-NEXT: movprfx z9, z11 +; CHECK-NEXT: frintx z9.s, p0/m, z11.s +; CHECK-NEXT: movprfx z11, z12 +; CHECK-NEXT: frintx z11.s, p0/m, z12.s +; CHECK-NEXT: uunpklo z15.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: sel z3.d, p3, z27.d, z15.d -; CHECK-NEXT: fcmge p3.s, p0/z, z4.s, z29.s -; CHECK-NEXT: frintx z13.s, p0/m, z13.s -; CHECK-NEXT: movprfx z15, z30 -; CHECK-NEXT: fcvtzs z15.d, p0/m, z30.s -; CHECK-NEXT: fcmge p5.s, p0/z, z9.s, z29.s -; CHECK-NEXT: fcmge p6.s, p0/z, z10.s, z29.s -; CHECK-NEXT: frintx z17.s, p0/m, z17.s -; CHECK-NEXT: movprfx z18, z4 -; CHECK-NEXT: fcvtzs z18.d, p0/m, z4.s -; CHECK-NEXT: movprfx z20, z10 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z10.s -; CHECK-NEXT: frintx z16.s, p0/m, z16.s -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: movprfx z19, z14 -; CHECK-NEXT: frintx z19.s, p0/m, z14.s -; CHECK-NEXT: movprfx z14, z9 -; CHECK-NEXT: fcvtzs z14.d, p0/m, z9.s -; CHECK-NEXT: fcmge p7.s, p0/z, z6.s, z29.s -; CHECK-NEXT: fcmge p8.s, p0/z, z13.s, z29.s +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p3.s, p0/z, z26.s, z10.s +; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.s, p0/z, z0.s, z10.s +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.s, p0/z, z24.s, z10.s +; CHECK-NEXT: movprfx z12, z13 +; CHECK-NEXT: frintx z12.s, p0/m, z13.s +; CHECK-NEXT: fcmge p2.s, p0/z, z25.s, z10.s +; CHECK-NEXT: fcmge p4.s, p0/z, z2.s, z10.s +; CHECK-NEXT: movprfx z13, z14 +; CHECK-NEXT: frintx z13.s, p0/m, z14.s +; CHECK-NEXT: uunpklo z17.d, z5.s +; CHECK-NEXT: uunpkhi z18.d, z5.s ; CHECK-NEXT: movprfx z21, z7 ; CHECK-NEXT: frintx z21.s, p0/m, z7.s -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: mov z15.d, p4/m, z27.d -; CHECK-NEXT: fcmge p4.s, p0/z, z17.s, z29.s -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: sel z7.d, p3, z27.d, z18.d -; CHECK-NEXT: movprfx z0, z17 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z17.s -; CHECK-NEXT: sel z18.d, p6, z27.d, z20.d -; CHECK-NEXT: movprfx z20, z6 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z6.s -; CHECK-NEXT: fcmge p6.s, p0/z, z16.s, z29.s -; CHECK-NEXT: fcmge p3.s, p0/z, z19.s, z29.s -; CHECK-NEXT: mov z14.d, p5/m, z27.d -; CHECK-NEXT: not p5.b, p0/z, p7.b -; CHECK-NEXT: not p7.b, p0/z, p8.b -; CHECK-NEXT: fcmge p8.s, p0/z, z21.s, z29.s -; CHECK-NEXT: movprfx z1, z16 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z16.s -; CHECK-NEXT: movprfx z22, z13 -; CHECK-NEXT: fcvtzs z22.d, p0/m, z13.s -; CHECK-NEXT: movprfx z23, z19 -; CHECK-NEXT: fcvtzs z23.d, p0/m, z19.s -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: movprfx z2, z21 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z21.s -; CHECK-NEXT: mov z29.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z20.d, p5/m, z27.d -; CHECK-NEXT: not p5.b, p0/z, p6.b -; CHECK-NEXT: mov z0.d, p4/m, z27.d -; CHECK-NEXT: fcmgt p4.s, p0/z, z16.s, z25.s -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: not p6.b, p0/z, p8.b -; CHECK-NEXT: mov z1.d, p5/m, z27.d -; CHECK-NEXT: mov z22.d, p7/m, z27.d -; CHECK-NEXT: mov z23.d, p3/m, z27.d -; CHECK-NEXT: fcmgt p3.s, p0/z, z21.s, z25.s -; CHECK-NEXT: fcmuo p5.s, p0/z, z16.s, z16.s -; CHECK-NEXT: mov z2.d, p6/m, z27.d -; CHECK-NEXT: sel z27.d, p1, z29.d, z31.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z17.s, z25.s -; CHECK-NEXT: mov z1.d, p4/m, z29.d -; CHECK-NEXT: fcmgt p6.s, p0/z, z26.s, z25.s -; CHECK-NEXT: fcmgt p7.s, p0/z, z30.s, z25.s -; CHECK-NEXT: sel z31.d, p2, z29.d, z8.d -; CHECK-NEXT: fcmgt p2.s, p0/z, z13.s, z25.s -; CHECK-NEXT: fcmuo p8.s, p0/z, z21.s, z21.s -; CHECK-NEXT: mov z2.d, p3/m, z29.d -; CHECK-NEXT: fcmuo p4.s, p0/z, z17.s, z17.s -; CHECK-NEXT: fcmgt p3.s, p0/z, z19.s, z25.s -; CHECK-NEXT: mov z0.d, p1/m, z29.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z6.s, z25.s -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: sel z8.d, p9, z29.d, z11.d -; CHECK-NEXT: sel z11.d, p6, z29.d, z12.d -; CHECK-NEXT: sel z12.d, p7, z29.d, z15.d -; CHECK-NEXT: fcmgt p5.s, p0/z, z10.s, z25.s -; CHECK-NEXT: sel z15.d, p2, z29.d, z22.d -; CHECK-NEXT: fcmuo p2.s, p0/z, z13.s, z13.s -; CHECK-NEXT: str z1, [x8, #14, mul vl] -; CHECK-NEXT: mov z2.d, p8/m, #0 // =0x0 +; CHECK-NEXT: uunpklo z19.d, z6.s +; CHECK-NEXT: uunpkhi z20.d, z6.s +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fcvtzs z31.d, p3/m, z26.s +; CHECK-NEXT: fcmge p3.s, p0/z, z11.s, z10.s +; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: frintx z15.s, p0/m, z15.s +; CHECK-NEXT: fcvtzs z1.d, p5/m, z0.s +; CHECK-NEXT: fcvtzs z29.d, p1/m, z24.s +; CHECK-NEXT: fcvtzs z30.d, p2/m, z25.s +; CHECK-NEXT: fcvtzs z8.d, p4/m, z2.s +; CHECK-NEXT: fcmge p1.s, p0/z, z27.s, z10.s +; CHECK-NEXT: mov z4.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p2.s, p0/z, z9.s, z10.s +; CHECK-NEXT: mov z16.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p4.s, p0/z, z12.s, z10.s +; CHECK-NEXT: mov z6.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.s, p0/z, z13.s, z10.s +; CHECK-NEXT: mov z14.d, #0x8000000000000000 +; CHECK-NEXT: frintx z17.s, p0/m, z17.s +; CHECK-NEXT: frintx z18.s, p0/m, z18.s +; CHECK-NEXT: frintx z19.s, p0/m, z19.s +; CHECK-NEXT: frintx z20.s, p0/m, z20.s +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z5.d, p3/m, z11.s +; CHECK-NEXT: fcmge p3.s, p0/z, z21.s, z10.s +; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: fcmge p6.s, p0/z, z15.s, z10.s +; CHECK-NEXT: mov z22.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z4.d, p1/m, z27.s +; CHECK-NEXT: fcvtzs z16.d, p2/m, z9.s +; CHECK-NEXT: fcvtzs z6.d, p4/m, z12.s +; CHECK-NEXT: fcvtzs z14.d, p5/m, z13.s +; CHECK-NEXT: fcmge p1.s, p0/z, z17.s, z10.s +; CHECK-NEXT: fcmge p2.s, p0/z, z18.s, z10.s +; CHECK-NEXT: mov z23.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p4.s, p0/z, z19.s, z10.s +; CHECK-NEXT: fcmge p5.s, p0/z, z20.s, z10.s +; CHECK-NEXT: mov z10.d, #0x8000000000000000 +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z28.d, p3/m, z21.s +; CHECK-NEXT: mov z7.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p11.s, p0/z, z21.s, z3.s +; CHECK-NEXT: fcvtzs z22.d, p6/m, z15.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z15.s, z3.s +; CHECK-NEXT: fcmuo p6.s, p0/z, z15.s, z15.s +; CHECK-NEXT: mov z15.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p7.s, p0/z, z24.s, z3.s +; CHECK-NEXT: fcvtzs z23.d, p2/m, z18.s +; CHECK-NEXT: fcvtzs z10.d, p5/m, z20.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z9.s, z3.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z20.s, z3.s +; CHECK-NEXT: fcvtzs z0.d, p4/m, z19.s +; CHECK-NEXT: fcmuo p4.s, p0/z, z21.s, z21.s +; CHECK-NEXT: mov z28.d, p11/m, z7.d +; CHECK-NEXT: sel z21.d, p3, z7.d, z22.d +; CHECK-NEXT: fcmgt p3.s, p0/z, z19.s, z3.s +; CHECK-NEXT: fcvtzs z15.d, p1/m, z17.s +; CHECK-NEXT: fcmuo p1.s, p0/z, z20.s, z20.s +; CHECK-NEXT: mov z29.d, p7/m, z7.d +; CHECK-NEXT: fcmgt p7.s, p0/z, z18.s, z3.s +; CHECK-NEXT: mov z16.d, p2/m, z7.d +; CHECK-NEXT: fcmgt p2.s, p0/z, z17.s, z3.s +; CHECK-NEXT: mov z10.d, p5/m, z7.d +; CHECK-NEXT: mov z28.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.s, p0/z, z19.s, z19.s +; CHECK-NEXT: mov z0.d, p3/m, z7.d +; CHECK-NEXT: fcmuo p3.s, p0/z, z18.s, z18.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z13.s, z3.s +; CHECK-NEXT: mov z21.d, p6/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p10.s, p0/z, z2.s, z3.s +; CHECK-NEXT: fcmgt p8.s, p0/z, z25.s, z3.s +; CHECK-NEXT: str z28, [x8, #15, mul vl] +; CHECK-NEXT: mov z10.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.s, p0/z, z17.s, z17.s +; CHECK-NEXT: sel z19.d, p7, z7.d, z23.d +; CHECK-NEXT: sel z28.d, p2, z7.d, z15.d +; CHECK-NEXT: fcmgt p2.s, p0/z, z12.s, z3.s +; CHECK-NEXT: str z21, [x8, #14, mul vl] ; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p1, z29.d, z20.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z9.s, z25.s -; CHECK-NEXT: fcmuo p6.s, p0/z, z19.s, z19.s -; CHECK-NEXT: sel z16.d, p3, z29.d, z23.d -; CHECK-NEXT: fcmuo p3.s, p0/z, z6.s, z6.s -; CHECK-NEXT: fcmgt p4.s, p0/z, z4.s, z25.s -; CHECK-NEXT: str z2, [x8, #15, mul vl] -; CHECK-NEXT: sel z2.d, p5, z29.d, z18.d -; CHECK-NEXT: fcmuo p5.s, p0/z, z10.s, z10.s -; CHECK-NEXT: str z0, [x8, #13, mul vl] -; CHECK-NEXT: mov z15.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p2.s, p0/z, z9.s, z9.s -; CHECK-NEXT: sel z0.d, p1, z29.d, z14.d -; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p1.s, p0/z, z4.s, z4.s -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p3.s, p0/z, z28.s, z25.s -; CHECK-NEXT: sel z4.d, p4, z29.d, z7.d -; CHECK-NEXT: str z15, [x8, #11, mul vl] -; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.s, p0/z, z28.s, z28.s -; CHECK-NEXT: str z16, [x8, #12, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p4.s, p0/z, z30.s, z30.s -; CHECK-NEXT: str z1, [x8, #10, mul vl] -; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p1.s, p0/z, z5.s, z5.s -; CHECK-NEXT: sel z1.d, p3, z29.d, z3.d -; CHECK-NEXT: ldr z3, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str z2, [x8, #9, mul vl] +; CHECK-NEXT: mov z14.d, p5/m, z7.d +; CHECK-NEXT: str z10, [x8, #13, mul vl] +; CHECK-NEXT: fcmgt p5.s, p0/z, z11.s, z3.s +; CHECK-NEXT: fcmuo p4.s, p0/z, z13.s, z13.s +; CHECK-NEXT: mov z19.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z28.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.s, p0/z, z27.s, z3.s +; CHECK-NEXT: str z0, [x8, #12, mul vl] +; CHECK-NEXT: fcmuo p3.s, p0/z, z12.s, z12.s +; CHECK-NEXT: sel z0.d, p2, z7.d, z6.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z11.s, z11.s +; CHECK-NEXT: fcmgt p9.s, p0/z, z26.s, z3.s +; CHECK-NEXT: mov z30.d, p8/m, z7.d +; CHECK-NEXT: str z19, [x8, #11, mul vl] +; CHECK-NEXT: mov z5.d, p5/m, z7.d +; CHECK-NEXT: fcmuo p5.s, p0/z, z9.s, z9.s +; CHECK-NEXT: str z28, [x8, #10, mul vl] +; CHECK-NEXT: mov z4.d, p1/m, z7.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z2.s, z2.s +; CHECK-NEXT: ldr z2, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z14.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.s, p0/z, z27.s, z27.s +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.s, p0/z, z26.s, z26.s +; CHECK-NEXT: mov z16.d, p5/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p5.s, p0/z, z25.s, z25.s +; CHECK-NEXT: mov z31.d, p9/m, z7.d +; CHECK-NEXT: str z14, [x8, #9, mul vl] +; CHECK-NEXT: fcmgt p3.s, p0/z, z2.s, z3.s +; CHECK-NEXT: mov z8.d, p10/m, z7.d ; CHECK-NEXT: str z0, [x8, #8, mul vl] -; CHECK-NEXT: fcmuo p3.s, p0/z, z26.s, z26.s -; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str z4, [x8, #7, mul vl] -; CHECK-NEXT: mov z12.d, p4/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p2.s, p0/z, z3.s, z25.s -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p4/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p4.s, p0/z, z24.s, z24.s +; CHECK-NEXT: str z5, [x8, #7, mul vl] +; CHECK-NEXT: fcmuo p0.s, p0/z, z2.s, z2.s +; CHECK-NEXT: mov z31.d, p2/m, #0 // =0x0 +; CHECK-NEXT: str z16, [x8, #6, mul vl] ; CHECK-NEXT: mov z8.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.s, p0/z, z0.s, z0.s -; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: str z12, [x8, #6, mul vl] -; CHECK-NEXT: str z1, [x8, #5, mul vl] -; CHECK-NEXT: fcmuo p0.s, p0/z, z3.s, z3.s -; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: str z8, [x8, #3, mul vl] -; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p2/m, z29.d -; CHECK-NEXT: str z11, [x8, #4, mul vl] -; CHECK-NEXT: mov z27.d, p5/m, #0 // =0x0 -; CHECK-NEXT: str z31, [x8, #2, mul vl] +; CHECK-NEXT: mov z30.d, p5/m, #0 // =0x0 +; CHECK-NEXT: str z4, [x8, #5, mul vl] +; CHECK-NEXT: sel z0.d, p3, z7.d, z1.d +; CHECK-NEXT: str z31, [x8, #3, mul vl] +; CHECK-NEXT: mov z29.d, p4/m, #0 // =0x0 +; CHECK-NEXT: str z8, [x8, #4, mul vl] +; CHECK-NEXT: str z30, [x8, #2, mul vl] ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 -; CHECK-NEXT: str z27, [x8, #1, mul vl] +; CHECK-NEXT: str z29, [x8, #1, mul vl] ; CHECK-NEXT: str z0, [x8] -; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1184,6 +1025,8 @@ define @lrint_v32f32( %x) { ; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload @@ -1203,20 +1046,17 @@ define @lrint_v1f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv1iXLen.nxv1f64( %x) @@ -1229,20 +1069,17 @@ define @lrint_v2f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d -; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov z1.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv2iXLen.nxv2f64( %x) @@ -1253,41 +1090,28 @@ declare @llvm.lrint.nxv2iXLen.nxv2f64( define @lrint_v4f64( %x) { ; CHECK-LABEL: lrint_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d -; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: mov z5.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d ; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z1.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z3.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z3.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmuo p3.d, p0/z, z0.d, z0.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d +; CHECK-NEXT: fcvtzs z4.d, p2/m, z1.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z5.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z1.d, z1.d -; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d -; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d -; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d +; CHECK-NEXT: sel z1.d, p2, z2.d, z4.d +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv4iXLen.nxv4f64( %x) ret %a @@ -1299,7 +1123,6 @@ define @lrint_v8f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill @@ -1309,52 +1132,42 @@ define @lrint_v8f64( %x) { ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 ; CHECK-NEXT: mov z5.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov z6.d, #0x8000000000000000 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff -; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: frintx z2.d, p0/m, z2.d ; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: mov z6.d, x8 +; CHECK-NEXT: mov z25.d, x8 +; CHECK-NEXT: mov z7.d, #0x8000000000000000 +; CHECK-NEXT: mov z24.d, #0x8000000000000000 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z4.d ; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z4.d ; CHECK-NEXT: fcmge p3.d, p0/z, z2.d, z4.d ; CHECK-NEXT: fcmge p4.d, p0/z, z3.d, z4.d -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.d -; CHECK-NEXT: movprfx z24, z2 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z2.d -; CHECK-NEXT: movprfx z25, z3 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z3.d -; CHECK-NEXT: fcmgt p7.d, p0/z, z2.d, z6.d -; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z6.d -; CHECK-NEXT: fcmgt p6.d, p0/z, z1.d, z6.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z4.d, p1/m, z5.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z6.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d -; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d -; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d -; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d -; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d -; CHECK-NEXT: fcmuo p4.d, p0/z, z2.d, z2.d +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z25.d +; CHECK-NEXT: fcmuo p6.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcvtzs z5.d, p1/m, z0.d +; CHECK-NEXT: fcvtzs z6.d, p2/m, z1.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z25.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z25.d +; CHECK-NEXT: fcvtzs z7.d, p3/m, z2.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z2.d, z25.d +; CHECK-NEXT: fcvtzs z24.d, p4/m, z3.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z0.d, z0.d +; CHECK-NEXT: sel z0.d, p1, z4.d, z5.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z2.d, z2.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z3.d, z3.d -; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d -; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d -; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: sel z1.d, p2, z4.d, z6.d +; CHECK-NEXT: sel z2.d, p3, z4.d, z7.d +; CHECK-NEXT: sel z3.d, p5, z4.d, z24.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1368,7 +1181,7 @@ define @lrint_v16f64( %x) { ; CHECK-LABEL: lrint_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill @@ -1376,109 +1189,93 @@ define @lrint_v16f64( %x) { ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z25.d, x8 +; CHECK-NEXT: mov z26.d, #0x8000000000000000 +; CHECK-NEXT: mov z24.d, x8 ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff -; CHECK-NEXT: movprfx z26, z0 -; CHECK-NEXT: frintx z26.d, p0/m, z0.d -; CHECK-NEXT: movprfx z27, z1 -; CHECK-NEXT: frintx z27.d, p0/m, z1.d +; CHECK-NEXT: mov z27.d, #0x8000000000000000 +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: frintx z1.d, p0/m, z1.d +; CHECK-NEXT: movprfx z25, z4 +; CHECK-NEXT: frintx z25.d, p0/m, z4.d ; CHECK-NEXT: frintx z2.d, p0/m, z2.d -; CHECK-NEXT: mov z0.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: movprfx z28, z4 -; CHECK-NEXT: frintx z28.d, p0/m, z4.d ; CHECK-NEXT: frintx z5.d, p0/m, z5.d ; CHECK-NEXT: frintx z6.d, p0/m, z6.d +; CHECK-NEXT: mov z30.d, x8 +; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: frintx z7.d, p0/m, z7.d -; CHECK-NEXT: fcmge p1.d, p0/z, z26.d, z25.d -; CHECK-NEXT: fcmge p2.d, p0/z, z27.d, z25.d -; CHECK-NEXT: movprfx z4, z26 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z26.d -; CHECK-NEXT: fcmge p5.d, p0/z, z2.d, z25.d -; CHECK-NEXT: movprfx z29, z27 -; CHECK-NEXT: fcvtzs z29.d, p0/m, z27.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z26.d, z1.d -; CHECK-NEXT: fcmge p6.d, p0/z, z3.d, z25.d -; CHECK-NEXT: fcmge p8.d, p0/z, z5.d, z25.d -; CHECK-NEXT: fcmgt p7.d, p0/z, z27.d, z1.d -; CHECK-NEXT: fcmge p9.d, p0/z, z6.d, z25.d -; CHECK-NEXT: movprfx z30, z28 -; CHECK-NEXT: fcvtzs z30.d, p0/m, z28.d -; CHECK-NEXT: fcmge p10.d, p0/z, z7.d, z25.d -; CHECK-NEXT: not p4.b, p0/z, p1.b -; CHECK-NEXT: fcmuo p1.d, p0/z, z26.d, z26.d -; CHECK-NEXT: movprfx z26, z2 -; CHECK-NEXT: fcvtzs z26.d, p0/m, z2.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: movprfx z31, z6 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z6.d -; CHECK-NEXT: movprfx z8, z7 -; CHECK-NEXT: fcvtzs z8.d, p0/m, z7.d -; CHECK-NEXT: mov z4.d, p4/m, z0.d -; CHECK-NEXT: fcmge p4.d, p0/z, z28.d, z25.d -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: mov z29.d, p2/m, z0.d -; CHECK-NEXT: fcmuo p2.d, p0/z, z27.d, z27.d -; CHECK-NEXT: movprfx z27, z3 -; CHECK-NEXT: fcvtzs z27.d, p0/m, z3.d -; CHECK-NEXT: sel z25.d, p5, z0.d, z26.d -; CHECK-NEXT: movprfx z26, z5 -; CHECK-NEXT: fcvtzs z26.d, p0/m, z5.d -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: not p5.b, p0/z, p8.b -; CHECK-NEXT: fcmgt p8.d, p0/z, z2.d, z1.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z27.d, p6/m, z0.d -; CHECK-NEXT: not p6.b, p0/z, p9.b +; CHECK-NEXT: mov z28.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z24.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z24.d +; CHECK-NEXT: fcmge p5.d, p0/z, z25.d, z24.d +; CHECK-NEXT: fcmge p3.d, p0/z, z2.d, z24.d +; CHECK-NEXT: fcmge p4.d, p0/z, z3.d, z24.d +; CHECK-NEXT: fcmge p7.d, p0/z, z5.d, z24.d +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p6.d, p0/z, z6.d, z24.d +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: mov z9.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p8.d, p0/z, z25.d, z30.d +; CHECK-NEXT: fcmgt p10.d, p0/z, z6.d, z30.d +; CHECK-NEXT: fcvtzs z26.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z30.d +; CHECK-NEXT: fcvtzs z4.d, p2/m, z1.d +; CHECK-NEXT: fcmge p2.d, p0/z, z7.d, z24.d +; CHECK-NEXT: mov z24.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z27.d, p3/m, z2.d +; CHECK-NEXT: fcvtzs z28.d, p4/m, z3.d +; CHECK-NEXT: fcvtzs z29.d, p5/m, z25.d +; CHECK-NEXT: fcvtzs z31.d, p7/m, z5.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z30.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z2.d, z30.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z3.d, z30.d +; CHECK-NEXT: fcvtzs z8.d, p6/m, z6.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z0.d, z0.d +; CHECK-NEXT: sel z0.d, p1, z9.d, z26.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z5.d, z30.d +; CHECK-NEXT: fcvtzs z24.d, p2/m, z7.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z7.d, z30.d +; CHECK-NEXT: fcmuo p6.d, p0/z, z1.d, z1.d ; CHECK-NEXT: fcmuo p9.d, p0/z, z2.d, z2.d -; CHECK-NEXT: mov z30.d, p4/m, z0.d -; CHECK-NEXT: not p4.b, p0/z, p10.b -; CHECK-NEXT: fcmgt p10.d, p0/z, z3.d, z1.d -; CHECK-NEXT: mov z26.d, p5/m, z0.d -; CHECK-NEXT: fcmgt p5.d, p0/z, z28.d, z1.d -; CHECK-NEXT: mov z31.d, p6/m, z0.d -; CHECK-NEXT: mov z8.d, p4/m, z0.d -; CHECK-NEXT: sel z0.d, p3, z24.d, z4.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z5.d, z1.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z6.d, z1.d -; CHECK-NEXT: fcmgt p6.d, p0/z, z7.d, z1.d -; CHECK-NEXT: sel z1.d, p7, z24.d, z29.d -; CHECK-NEXT: fcmuo p7.d, p0/z, z3.d, z3.d -; CHECK-NEXT: sel z2.d, p8, z24.d, z25.d -; CHECK-NEXT: sel z3.d, p10, z24.d, z27.d -; CHECK-NEXT: sel z4.d, p5, z24.d, z30.d -; CHECK-NEXT: fcmuo p5.d, p0/z, z28.d, z28.d -; CHECK-NEXT: fcmuo p8.d, p0/z, z5.d, z5.d -; CHECK-NEXT: fcmuo p10.d, p0/z, z6.d, z6.d -; CHECK-NEXT: sel z5.d, p3, z24.d, z26.d +; CHECK-NEXT: sel z1.d, p4, z9.d, z4.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z3.d, z3.d +; CHECK-NEXT: sel z2.d, p5, z9.d, z27.d +; CHECK-NEXT: sel z3.d, p7, z9.d, z28.d +; CHECK-NEXT: sel z4.d, p8, z9.d, z29.d +; CHECK-NEXT: fcmuo p5.d, p0/z, z25.d, z25.d +; CHECK-NEXT: fcmuo p7.d, p0/z, z5.d, z5.d +; CHECK-NEXT: fcmuo p8.d, p0/z, z6.d, z6.d +; CHECK-NEXT: sel z5.d, p1, z9.d, z31.d +; CHECK-NEXT: sel z6.d, p10, z9.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: fcmuo p0.d, p0/z, z7.d, z7.d -; CHECK-NEXT: sel z6.d, p4, z24.d, z31.d -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: sel z7.d, p6, z24.d, z8.d -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: sel z7.d, p2, z9.d, z24.d +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z3.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 -; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 -; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z5.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z6.d, p8/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 ; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv16iXLen.nxv16f64( %x) @@ -1491,6 +1288,8 @@ define @lrint_v32f64( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p11, [sp] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill @@ -1513,8 +1312,8 @@ define @lrint_v32f64( %x) { ; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG @@ -1527,219 +1326,176 @@ define @lrint_v32f64( %x) { ; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr z2, [x0, #2, mul vl] -; CHECK-NEXT: mov x9, #-4332462841530417152 // =0xc3e0000000000000 -; CHECK-NEXT: ldr z24, [x0, #6, mul vl] ; CHECK-NEXT: ldr z1, [x0, #1, mul vl] -; CHECK-NEXT: mov z7.d, x9 -; CHECK-NEXT: mov z26.d, #0x8000000000000000 -; CHECK-NEXT: ldr z3, [x0, #3, mul vl] +; CHECK-NEXT: ldr z6, [x0, #4, mul vl] +; CHECK-NEXT: mov x9, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: ldr z5, [x0, #3, mul vl] +; CHECK-NEXT: mov z25.d, x9 +; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z30, z2 -; CHECK-NEXT: frintx z30.d, p0/m, z2.d -; CHECK-NEXT: ldr z6, [x0, #5, mul vl] -; CHECK-NEXT: movprfx z25, z24 -; CHECK-NEXT: frintx z25.d, p0/m, z24.d -; CHECK-NEXT: movprfx z12, z1 -; CHECK-NEXT: frintx z12.d, p0/m, z1.d -; CHECK-NEXT: ldr z5, [x0, #4, mul vl] -; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: mov x9, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: frintx z4.d, p0/m, z2.d +; CHECK-NEXT: mov z27.d, #0x8000000000000000 +; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: frintx z6.d, p0/m, z6.d -; CHECK-NEXT: mov z4.d, x9 -; CHECK-NEXT: fcmge p3.d, p0/z, z0.d, z7.d -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: fcvtzs z24.d, p0/m, z0.d -; CHECK-NEXT: fcmge p5.d, p0/z, z30.d, z7.d -; CHECK-NEXT: movprfx z28, z30 -; CHECK-NEXT: fcvtzs z28.d, p0/m, z30.d -; CHECK-NEXT: str z0, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z30.d, #0x8000000000000000 ; CHECK-NEXT: frintx z5.d, p0/m, z5.d -; CHECK-NEXT: fcmge p4.d, p0/z, z12.d, z7.d -; CHECK-NEXT: ldr z8, [x0, #7, mul vl] -; CHECK-NEXT: ldr z9, [x0, #15, mul vl] -; CHECK-NEXT: movprfx z27, z12 -; CHECK-NEXT: fcvtzs z27.d, p0/m, z12.d -; CHECK-NEXT: fcmge p6.d, p0/z, z3.d, z7.d -; CHECK-NEXT: fcmge p9.d, p0/z, z6.d, z7.d -; CHECK-NEXT: not p7.b, p0/z, p3.b -; CHECK-NEXT: movprfx z31, z3 -; CHECK-NEXT: fcvtzs z31.d, p0/m, z3.d -; CHECK-NEXT: movprfx z15, z6 -; CHECK-NEXT: fcvtzs z15.d, p0/m, z6.d -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: fcmge p8.d, p0/z, z5.d, z7.d -; CHECK-NEXT: movprfx z13, z5 -; CHECK-NEXT: fcvtzs z13.d, p0/m, z5.d -; CHECK-NEXT: sel z0.d, p7, z26.d, z24.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: movprfx z17, z25 -; CHECK-NEXT: fcvtzs z17.d, p0/m, z25.d -; CHECK-NEXT: not p3.b, p0/z, p6.b -; CHECK-NEXT: fcmge p6.d, p0/z, z25.d, z7.d -; CHECK-NEXT: movprfx z22, z9 -; CHECK-NEXT: frintx z22.d, p0/m, z9.d -; CHECK-NEXT: sel z29.d, p4, z26.d, z27.d -; CHECK-NEXT: movprfx z27, z8 -; CHECK-NEXT: frintx z27.d, p0/m, z8.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z12.d, z4.d -; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: sel z0.d, p5, z26.d, z28.d -; CHECK-NEXT: not p4.b, p0/z, p8.b -; CHECK-NEXT: ldr z10, [x0, #8, mul vl] -; CHECK-NEXT: not p5.b, p0/z, p9.b -; CHECK-NEXT: sel z24.d, p3, z26.d, z31.d -; CHECK-NEXT: not p3.b, p0/z, p6.b -; CHECK-NEXT: movprfx z2, z22 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z22.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z30.d, z4.d +; CHECK-NEXT: mov z26.d, #0x8000000000000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z13.d, #0x8000000000000000 +; CHECK-NEXT: mov z12.d, #0x8000000000000000 +; CHECK-NEXT: mov x10, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fcmge p7.d, p0/z, z27.d, z7.d -; CHECK-NEXT: sel z31.d, p5, z26.d, z15.d -; CHECK-NEXT: ldr z11, [x0, #9, mul vl] -; CHECK-NEXT: movprfx z28, z10 -; CHECK-NEXT: frintx z28.d, p0/m, z10.d -; CHECK-NEXT: ldr z10, [x0, #10, mul vl] -; CHECK-NEXT: ldr z18, [x0, #11, mul vl] -; CHECK-NEXT: ldr z16, [x0, #13, mul vl] -; CHECK-NEXT: ldr z14, [x0, #14, mul vl] -; CHECK-NEXT: ldr z19, [x0, #12, mul vl] -; CHECK-NEXT: mov z17.d, p3/m, z26.d -; CHECK-NEXT: fcmgt p9.d, p0/z, z3.d, z4.d -; CHECK-NEXT: movprfx z8, z11 -; CHECK-NEXT: frintx z8.d, p0/m, z11.d -; CHECK-NEXT: sel z11.d, p4, z26.d, z13.d -; CHECK-NEXT: frintx z10.d, p0/m, z10.d -; CHECK-NEXT: movprfx z13, z18 -; CHECK-NEXT: frintx z13.d, p0/m, z18.d -; CHECK-NEXT: fcmge p5.d, p0/z, z28.d, z7.d -; CHECK-NEXT: movprfx z18, z27 -; CHECK-NEXT: fcvtzs z18.d, p0/m, z27.d +; CHECK-NEXT: fcmge p3.d, p0/z, z4.d, z25.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z25.d +; CHECK-NEXT: ldr z29, [x0, #7, mul vl] +; CHECK-NEXT: ldr z24, [x0, #6, mul vl] +; CHECK-NEXT: ldr z10, [x0, #9, mul vl] +; CHECK-NEXT: ldr z8, [x0, #8, mul vl] +; CHECK-NEXT: ldr z7, [x0, #5, mul vl] +; CHECK-NEXT: ldr z14, [x0, #15, mul vl] +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z25.d +; CHECK-NEXT: fcmge p5.d, p0/z, z6.d, z25.d +; CHECK-NEXT: ldr z15, [x0, #14, mul vl] +; CHECK-NEXT: frintx z29.d, p0/m, z29.d +; CHECK-NEXT: frintx z24.d, p0/m, z24.d +; CHECK-NEXT: movprfx z11, z10 +; CHECK-NEXT: frintx z11.d, p0/m, z10.d +; CHECK-NEXT: fcmge p4.d, p0/z, z5.d, z25.d +; CHECK-NEXT: movprfx z9, z8 +; CHECK-NEXT: frintx z9.d, p0/m, z8.d +; CHECK-NEXT: ldr z16, [x0, #11, mul vl] +; CHECK-NEXT: ldr z20, [x0, #13, mul vl] +; CHECK-NEXT: frintx z7.d, p0/m, z7.d +; CHECK-NEXT: fcvtzs z28.d, p3/m, z4.d +; CHECK-NEXT: mov z10.d, #0x8000000000000000 +; CHECK-NEXT: ldr z18, [x0, #12, mul vl] +; CHECK-NEXT: movprfx z19, z14 +; CHECK-NEXT: frintx z19.d, p0/m, z14.d +; CHECK-NEXT: fcmge p3.d, p0/z, z29.d, z25.d +; CHECK-NEXT: ldr z17, [x0, #10, mul vl] +; CHECK-NEXT: frintx z15.d, p0/m, z15.d +; CHECK-NEXT: fcvtzs z27.d, p2/m, z1.d +; CHECK-NEXT: fcvtzs z30.d, p5/m, z6.d +; CHECK-NEXT: fcmge p2.d, p0/z, z24.d, z25.d +; CHECK-NEXT: fcmge p5.d, p0/z, z11.d, z25.d +; CHECK-NEXT: mov z14.d, #0x8000000000000000 ; CHECK-NEXT: frintx z16.d, p0/m, z16.d -; CHECK-NEXT: movprfx z15, z19 -; CHECK-NEXT: frintx z15.d, p0/m, z19.d -; CHECK-NEXT: movprfx z19, z28 -; CHECK-NEXT: fcvtzs z19.d, p0/m, z28.d -; CHECK-NEXT: movprfx z21, z14 -; CHECK-NEXT: frintx z21.d, p0/m, z14.d -; CHECK-NEXT: not p4.b, p0/z, p7.b -; CHECK-NEXT: fcmge p6.d, p0/z, z8.d, z7.d -; CHECK-NEXT: movprfx z20, z8 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z8.d -; CHECK-NEXT: fcmge p7.d, p0/z, z10.d, z7.d -; CHECK-NEXT: fcmge p8.d, p0/z, z13.d, z7.d -; CHECK-NEXT: not p5.b, p0/z, p5.b -; CHECK-NEXT: sel z9.d, p4, z26.d, z18.d -; CHECK-NEXT: fcmge p4.d, p0/z, z16.d, z7.d -; CHECK-NEXT: fcmge p3.d, p0/z, z15.d, z7.d -; CHECK-NEXT: movprfx z0, z16 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z16.d -; CHECK-NEXT: sel z14.d, p5, z26.d, z19.d -; CHECK-NEXT: movprfx z19, z10 -; CHECK-NEXT: fcvtzs z19.d, p0/m, z10.d -; CHECK-NEXT: movprfx z1, z21 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z21.d -; CHECK-NEXT: not p6.b, p0/z, p6.b -; CHECK-NEXT: movprfx z23, z15 -; CHECK-NEXT: fcvtzs z23.d, p0/m, z15.d -; CHECK-NEXT: not p5.b, p0/z, p7.b -; CHECK-NEXT: sel z18.d, p6, z26.d, z20.d -; CHECK-NEXT: fcmge p6.d, p0/z, z21.d, z7.d -; CHECK-NEXT: not p7.b, p0/z, p8.b -; CHECK-NEXT: fcmge p8.d, p0/z, z22.d, z7.d -; CHECK-NEXT: movprfx z20, z13 -; CHECK-NEXT: fcvtzs z20.d, p0/m, z13.d -; CHECK-NEXT: not p4.b, p0/z, p4.b -; CHECK-NEXT: mov z7.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z19.d, p5/m, z26.d -; CHECK-NEXT: not p3.b, p0/z, p3.b -; CHECK-NEXT: mov z0.d, p4/m, z26.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z21.d, z4.d -; CHECK-NEXT: not p5.b, p0/z, p6.b -; CHECK-NEXT: mov z23.d, p3/m, z26.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z22.d, z4.d -; CHECK-NEXT: not p6.b, p0/z, p8.b -; CHECK-NEXT: mov z20.d, p7/m, z26.d -; CHECK-NEXT: fcmuo p8.d, p0/z, z22.d, z22.d -; CHECK-NEXT: mov z1.d, p5/m, z26.d -; CHECK-NEXT: fcmuo p5.d, p0/z, z21.d, z21.d -; CHECK-NEXT: fcmgt p7.d, p0/z, z25.d, z4.d -; CHECK-NEXT: mov z2.d, p6/m, z26.d -; CHECK-NEXT: sel z26.d, p1, z7.d, z29.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z16.d, z4.d -; CHECK-NEXT: ldr z29, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fcmgt p6.d, p0/z, z5.d, z4.d -; CHECK-NEXT: mov z24.d, p9/m, z7.d -; CHECK-NEXT: mov z1.d, p4/m, z7.d -; CHECK-NEXT: fcmuo p4.d, p0/z, z16.d, z16.d -; CHECK-NEXT: mov z2.d, p3/m, z7.d -; CHECK-NEXT: fcmgt p3.d, p0/z, z15.d, z4.d -; CHECK-NEXT: mov z17.d, p7/m, z7.d -; CHECK-NEXT: mov z29.d, p2/m, z7.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z13.d, z4.d -; CHECK-NEXT: mov z0.d, p1/m, z7.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z10.d, z4.d -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: mov z11.d, p6/m, z7.d +; CHECK-NEXT: frintx z20.d, p0/m, z20.d +; CHECK-NEXT: fcvtzs z26.d, p4/m, z5.d +; CHECK-NEXT: fcmge p4.d, p0/z, z9.d, z25.d +; CHECK-NEXT: frintx z18.d, p0/m, z18.d +; CHECK-NEXT: mov z31.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.d +; CHECK-NEXT: fcmge p1.d, p0/z, z7.d, z25.d +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: frintx z17.d, p0/m, z17.d +; CHECK-NEXT: fcvtzs z10.d, p3/m, z29.d +; CHECK-NEXT: fcmge p3.d, p0/z, z19.d, z25.d +; CHECK-NEXT: mov z3.d, x10 +; CHECK-NEXT: fcmge p6.d, p0/z, z15.d, z25.d +; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z13.d, p2/m, z24.d +; CHECK-NEXT: fcvtzs z14.d, p5/m, z11.d +; CHECK-NEXT: fcmge p2.d, p0/z, z16.d, z25.d +; CHECK-NEXT: mov z22.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p5.d, p0/z, z20.d, z25.d +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z12.d, p4/m, z9.d +; CHECK-NEXT: fcmge p4.d, p0/z, z18.d, z25.d +; CHECK-NEXT: mov z23.d, #0x8000000000000000 +; CHECK-NEXT: fcvtzs z8.d, p1/m, z7.d +; CHECK-NEXT: fcmge p1.d, p0/z, z17.d, z25.d +; CHECK-NEXT: fcvtzs z31.d, p3/m, z19.d +; CHECK-NEXT: mov z25.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p11.d, p0/z, z19.d, z3.d +; CHECK-NEXT: fcvtzs z21.d, p6/m, z15.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z15.d, z3.d ; CHECK-NEXT: fcmuo p6.d, p0/z, z15.d, z15.d -; CHECK-NEXT: fcmgt p5.d, p0/z, z8.d, z4.d -; CHECK-NEXT: mov z2.d, p8/m, #0 // =0x0 -; CHECK-NEXT: sel z16.d, p3, z7.d, z23.d -; CHECK-NEXT: fcmuo p3.d, p0/z, z10.d, z10.d -; CHECK-NEXT: mov z0.d, p4/m, #0 // =0x0 -; CHECK-NEXT: sel z15.d, p2, z7.d, z20.d -; CHECK-NEXT: fcmuo p2.d, p0/z, z13.d, z13.d -; CHECK-NEXT: str z1, [x8, #14, mul vl] -; CHECK-NEXT: sel z1.d, p1, z7.d, z19.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z28.d, z4.d -; CHECK-NEXT: fcmgt p4.d, p0/z, z27.d, z4.d -; CHECK-NEXT: str z2, [x8, #15, mul vl] -; CHECK-NEXT: sel z2.d, p5, z7.d, z18.d -; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.d, p0/z, z8.d, z8.d +; CHECK-NEXT: mov z15.d, #0x8000000000000000 +; CHECK-NEXT: fcmgt p7.d, p0/z, z1.d, z3.d +; CHECK-NEXT: fcvtzs z22.d, p2/m, z16.d +; CHECK-NEXT: fcvtzs z0.d, p5/m, z20.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z24.d, z3.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z20.d, z3.d +; CHECK-NEXT: fcvtzs z23.d, p4/m, z18.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z19.d, z19.d +; CHECK-NEXT: mov z31.d, p11/m, z25.d +; CHECK-NEXT: sel z19.d, p3, z25.d, z21.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z18.d, z3.d +; CHECK-NEXT: fcvtzs z15.d, p1/m, z17.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z20.d, z20.d +; CHECK-NEXT: mov z27.d, p7/m, z25.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z16.d, z3.d +; CHECK-NEXT: mov z13.d, p2/m, z25.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z17.d, z3.d +; CHECK-NEXT: mov z0.d, p5/m, z25.d +; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.d, p0/z, z18.d, z18.d +; CHECK-NEXT: sel z20.d, p3, z25.d, z23.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z16.d, z16.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z11.d, z3.d +; CHECK-NEXT: mov z19.d, p6/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p10.d, p0/z, z6.d, z3.d +; CHECK-NEXT: fcmgt p8.d, p0/z, z4.d, z3.d +; CHECK-NEXT: str z31, [x8, #15, mul vl] +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.d, p0/z, z17.d, z17.d +; CHECK-NEXT: sel z18.d, p7, z25.d, z22.d +; CHECK-NEXT: sel z31.d, p2, z25.d, z15.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z9.d, z3.d +; CHECK-NEXT: str z19, [x8, #14, mul vl] +; CHECK-NEXT: mov z20.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.d, p0/z, z11.d, z11.d ; CHECK-NEXT: str z0, [x8, #13, mul vl] -; CHECK-NEXT: mov z15.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p2.d, p0/z, z28.d, z28.d -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p3.d, p0/z, z6.d, z4.d -; CHECK-NEXT: sel z0.d, p1, z7.d, z14.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z27.d, z27.d -; CHECK-NEXT: sel z27.d, p4, z7.d, z9.d -; CHECK-NEXT: str z16, [x8, #12, mul vl] -; CHECK-NEXT: fcmuo p4.d, p0/z, z25.d, z25.d -; CHECK-NEXT: str z15, [x8, #11, mul vl] -; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.d, p0/z, z6.d, z6.d -; CHECK-NEXT: str z1, [x8, #10, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p3, z7.d, z31.d -; CHECK-NEXT: fcmuo p3.d, p0/z, z5.d, z5.d -; CHECK-NEXT: ldr z5, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: mov z27.d, p1/m, #0 // =0x0 -; CHECK-NEXT: str z2, [x8, #9, mul vl] -; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d +; CHECK-NEXT: mov z14.d, p5/m, z25.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z29.d, z3.d +; CHECK-NEXT: mov z18.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z31.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z7.d, z3.d +; CHECK-NEXT: str z20, [x8, #12, mul vl] +; CHECK-NEXT: fcmuo p3.d, p0/z, z9.d, z9.d +; CHECK-NEXT: sel z0.d, p2, z25.d, z12.d +; CHECK-NEXT: mov z14.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.d, p0/z, z7.d, z7.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z29.d, z29.d +; CHECK-NEXT: str z18, [x8, #11, mul vl] +; CHECK-NEXT: sel z29.d, p5, z25.d, z10.d +; CHECK-NEXT: fcmuo p5.d, p0/z, z24.d, z24.d +; CHECK-NEXT: str z31, [x8, #10, mul vl] +; CHECK-NEXT: sel z7.d, p1, z25.d, z8.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z6.d, z6.d +; CHECK-NEXT: ldr z6, [sp] // 16-byte Folded Reload +; CHECK-NEXT: str z14, [x8, #9, mul vl] +; CHECK-NEXT: fcmgt p9.d, p0/z, z5.d, z3.d +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z29.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.d, p0/z, z5.d, z5.d +; CHECK-NEXT: mov z13.d, p5/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p5.d, p0/z, z4.d, z4.d +; CHECK-NEXT: mov z7.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p3.d, p0/z, z6.d, z3.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z6.d, z6.d ; CHECK-NEXT: str z0, [x8, #8, mul vl] -; CHECK-NEXT: mov z17.d, p4/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p4.d, p0/z, z30.d, z30.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z5.d, z4.d -; CHECK-NEXT: mov z1.d, p5/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p5.d, p0/z, z12.d, z12.d -; CHECK-NEXT: str z27, [x8, #7, mul vl] -; CHECK-NEXT: fcmuo p0.d, p0/z, z5.d, z5.d -; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: mov z24.d, p1/m, #0 // =0x0 -; CHECK-NEXT: str z17, [x8, #6, mul vl] -; CHECK-NEXT: mov z29.d, p4/m, #0 // =0x0 -; CHECK-NEXT: str z1, [x8, #5, mul vl] -; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 -; CHECK-NEXT: str z11, [x8, #4, mul vl] -; CHECK-NEXT: mov z0.d, p2/m, z7.d -; CHECK-NEXT: str z24, [x8, #3, mul vl] -; CHECK-NEXT: str z29, [x8, #2, mul vl] -; CHECK-NEXT: str z26, [x8, #1, mul vl] +; CHECK-NEXT: mov z28.d, p8/m, z25.d +; CHECK-NEXT: mov z26.d, p9/m, z25.d +; CHECK-NEXT: str z29, [x8, #7, mul vl] +; CHECK-NEXT: mov z30.d, p10/m, z25.d +; CHECK-NEXT: str z13, [x8, #6, mul vl] +; CHECK-NEXT: str z7, [x8, #5, mul vl] +; CHECK-NEXT: sel z0.d, p3, z25.d, z2.d +; CHECK-NEXT: mov z26.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z30.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z28.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z27.d, p4/m, #0 // =0x0 +; CHECK-NEXT: str z26, [x8, #3, mul vl] ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 +; CHECK-NEXT: str z30, [x8, #4, mul vl] +; CHECK-NEXT: str z28, [x8, #2, mul vl] +; CHECK-NEXT: str z27, [x8, #1, mul vl] ; CHECK-NEXT: str z0, [x8] -; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1756,6 +1512,8 @@ define @lrint_v32f64( %x) { ; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll index bbc94f568dd0a..0c0762da5bba2 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll @@ -989,9 +989,9 @@ define @fadd_nxv4f32_x( %x, %n, zeroinitializer @@ -1004,9 +1004,9 @@ define @fadd_nxv8f16_x( %x, %n, zeroinitializer @@ -1019,9 +1019,9 @@ define @fadd_nxv2f64_x( %x, %n, zeroinitializer @@ -1034,9 +1034,9 @@ define @fsub_nxv4f32_x( %x, %n, zeroinitializer @@ -1049,9 +1049,9 @@ define @fsub_nxv8f16_x( %x, %n, zeroinitializer @@ -1064,9 +1064,9 @@ define @fsub_nxv2f64_x( %x, %n, zeroinitializer @@ -1079,9 +1079,9 @@ define @fmul_nxv4f32_x( %x, %n, zeroinitializer @@ -1094,9 +1094,9 @@ define @fmul_nxv8f16_x( %x, %n, zeroinitializer @@ -1109,9 +1109,9 @@ define @fmul_nxv2f64_x( %x, %n, zeroinitializer @@ -1125,9 +1125,8 @@ define @fdiv_nxv4f32_x( %x, %n, zeroinitializer @@ -1141,9 +1140,8 @@ define @fdiv_nxv8f16_x( %x, %n, zeroinitializer @@ -1157,9 +1155,8 @@ define @fdiv_nxv2f64_x( %x, %n, zeroinitializer @@ -1173,8 +1170,8 @@ define @minnum_nxv4f32_x( %x, %n, zeroinitializer @@ -1188,8 +1185,8 @@ define @minnum_nxv8f16_x( %x, %n, zeroinitializer @@ -1203,8 +1200,8 @@ define @minnum_nxv2f64_x( %x, %n, zeroinitializer @@ -1218,8 +1215,8 @@ define @maxnum_nxv4f32_x( %x, %n, zeroinitializer @@ -1233,8 +1230,8 @@ define @maxnum_nxv8f16_x( %x, %n, zeroinitializer @@ -1248,8 +1245,8 @@ define @maxnum_nxv2f64_x( %x, %n, zeroinitializer @@ -1263,8 +1260,8 @@ define @minimum_nxv4f32_x( %x, %n, zeroinitializer @@ -1278,8 +1275,8 @@ define @minimum_nxv8f16_x( %x, %n, zeroinitializer @@ -1293,8 +1290,8 @@ define @minimum_nxv2f64_x( %x, %n, zeroinitializer @@ -1308,8 +1305,8 @@ define @maximum_nxv4f32_x( %x, %n, zeroinitializer @@ -1323,8 +1320,8 @@ define @maximum_nxv8f16_x( %x, %n, zeroinitializer @@ -1338,8 +1335,8 @@ define @maximum_nxv2f64_x( %x, %n, zeroinitializer @@ -1353,8 +1350,8 @@ define @fmai_nxv4f32_x( %x, %n, zeroinitializer @@ -1368,8 +1365,8 @@ define @fmai_nxv8f16_x( %x, %n, zeroinitializer @@ -1383,8 +1380,8 @@ define @fmai_nxv2f64_x( %x, %n, zeroinitializer @@ -1398,8 +1395,8 @@ define @fma_nxv4f32_x( %x, %n, zeroinitializer @@ -1414,8 +1411,8 @@ define @fma_nxv8f16_x( %x, %n, zeroinitializer @@ -1430,8 +1427,8 @@ define @fma_nxv2f64_x( %x, %n, zeroinitializer @@ -2470,9 +2467,8 @@ define @fadd_nxv4f32_y( %x, %n, zeroinitializer @@ -2486,9 +2482,8 @@ define @fadd_nxv8f16_y( %x, %n, zeroinitializer @@ -2502,9 +2497,8 @@ define @fadd_nxv2f64_y( %x, %n, zeroinitializer @@ -2517,10 +2511,9 @@ define @fsub_nxv4f32_y( %x, %n, zeroinitializer @@ -2533,10 +2526,9 @@ define @fsub_nxv8f16_y( %x, %n, zeroinitializer @@ -2549,10 +2541,9 @@ define @fsub_nxv2f64_y( %x, %n, zeroinitializer @@ -2566,9 +2557,8 @@ define @fmul_nxv4f32_y( %x, %n, zeroinitializer @@ -2582,9 +2572,8 @@ define @fmul_nxv8f16_y( %x, %n, zeroinitializer @@ -2598,9 +2587,8 @@ define @fmul_nxv2f64_y( %x, %n, zeroinitializer @@ -2614,9 +2602,8 @@ define @fdiv_nxv4f32_y( %x, %n, zeroinitializer @@ -2630,9 +2617,8 @@ define @fdiv_nxv8f16_y( %x, %n, zeroinitializer @@ -2646,9 +2632,8 @@ define @fdiv_nxv2f64_y( %x, %n, zeroinitializer @@ -2662,9 +2647,8 @@ define @minnum_nxv4f32_y( %x, %n, zeroinitializer @@ -2678,9 +2662,8 @@ define @minnum_nxv8f16_y( %x, %n, zeroinitializer @@ -2694,9 +2677,8 @@ define @minnum_nxv2f64_y( %x, %n, zeroinitializer @@ -2710,9 +2692,8 @@ define @maxnum_nxv4f32_y( %x, %n, zeroinitializer @@ -2726,9 +2707,8 @@ define @maxnum_nxv8f16_y( %x, %n, zeroinitializer @@ -2742,9 +2722,8 @@ define @maxnum_nxv2f64_y( %x, %n, zeroinitializer @@ -2758,9 +2737,8 @@ define @minimum_nxv4f32_y( %x, %n, zeroinitializer @@ -2774,9 +2752,8 @@ define @minimum_nxv8f16_y( %x, %n, zeroinitializer @@ -2790,9 +2767,8 @@ define @minimum_nxv2f64_y( %x, %n, zeroinitializer @@ -2806,9 +2782,8 @@ define @maximum_nxv4f32_y( %x, %n, zeroinitializer @@ -2822,9 +2797,8 @@ define @maximum_nxv8f16_y( %x, %n, zeroinitializer @@ -2838,9 +2812,8 @@ define @maximum_nxv2f64_y( %x, %n, zeroinitializer @@ -2855,8 +2828,7 @@ define @fmai_nxv4f32_y( %x, %n, zeroinitializer @@ -2871,8 +2843,7 @@ define @fmai_nxv8f16_y( %x, %n, zeroinitializer @@ -2887,8 +2858,7 @@ define @fmai_nxv2f64_y( %x, %n, zeroinitializer @@ -2903,8 +2873,7 @@ define @fma_nxv4f32_y( %x, %n, zeroinitializer @@ -2920,8 +2889,7 @@ define @fma_nxv8f16_y( %x, %n, zeroinitializer @@ -2937,8 +2905,7 @@ define @fma_nxv2f64_y( %x, %n, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll index 66dece82a0ac5..58d6149b94d3a 100644 --- a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll @@ -641,9 +641,9 @@ define @fadd_nxv4f32_x( %x, %n, zeroinitializer @@ -656,9 +656,9 @@ define @fadd_nxv8f16_x( %x, %n, zeroinitializer @@ -671,9 +671,9 @@ define @fadd_nxv2f64_x( %x, %n, zeroinitializer @@ -686,9 +686,9 @@ define @fsub_nxv4f32_x( %x, %n, zeroinitializer @@ -701,9 +701,9 @@ define @fsub_nxv8f16_x( %x, %n, zeroinitializer @@ -716,9 +716,9 @@ define @fsub_nxv2f64_x( %x, %n, zeroinitializer @@ -731,9 +731,9 @@ define @fmul_nxv4f32_x( %x, %n, zeroinitializer @@ -746,9 +746,9 @@ define @fmul_nxv8f16_x( %x, %n, zeroinitializer @@ -761,9 +761,9 @@ define @fmul_nxv2f64_x( %x, %n, zeroinitializer @@ -777,9 +777,8 @@ define @fdiv_nxv4f32_x( %x, %n, zeroinitializer @@ -793,9 +792,8 @@ define @fdiv_nxv8f16_x( %x, %n, zeroinitializer @@ -809,9 +807,8 @@ define @fdiv_nxv2f64_x( %x, %n, zeroinitializer @@ -825,8 +822,8 @@ define @fma_nxv4f32_x( %x, %n, zeroinitializer @@ -841,8 +838,8 @@ define @fma_nxv8f16_x( %x, %n, zeroinitializer @@ -857,8 +854,8 @@ define @fma_nxv2f64_x( %x, %n, zeroinitializer @@ -1540,10 +1537,9 @@ define @fadd_nxv4f32_y( %x, %n, zeroinitializer @@ -1556,10 +1552,9 @@ define @fadd_nxv8f16_y( %x, %n, zeroinitializer @@ -1572,10 +1567,9 @@ define @fadd_nxv2f64_y( %x, %n, zeroinitializer @@ -1588,10 +1582,9 @@ define @fsub_nxv4f32_y( %x, %n, zeroinitializer @@ -1604,10 +1597,9 @@ define @fsub_nxv8f16_y( %x, %n, zeroinitializer @@ -1620,10 +1612,9 @@ define @fsub_nxv2f64_y( %x, %n, zeroinitializer @@ -1636,10 +1627,9 @@ define @fmul_nxv4f32_y( %x, %n, zeroinitializer @@ -1652,10 +1642,9 @@ define @fmul_nxv8f16_y( %x, %n, zeroinitializer @@ -1668,10 +1657,9 @@ define @fmul_nxv2f64_y( %x, %n, zeroinitializer @@ -1685,9 +1673,8 @@ define @fdiv_nxv4f32_y( %x, %n, zeroinitializer @@ -1701,9 +1688,8 @@ define @fdiv_nxv8f16_y( %x, %n, zeroinitializer @@ -1717,9 +1703,8 @@ define @fdiv_nxv2f64_y( %x, %n, zeroinitializer @@ -1734,8 +1719,7 @@ define @fmai_nxv4f32_y( %x, %n, zeroinitializer @@ -1750,8 +1734,7 @@ define @fmai_nxv8f16_y( %x, %n, zeroinitializer @@ -1766,8 +1749,7 @@ define @fmai_nxv2f64_y( %x, %n, zeroinitializer @@ -1782,8 +1764,7 @@ define @fma_nxv4f32_y( %x, %n, zeroinitializer @@ -1799,8 +1780,7 @@ define @fma_nxv8f16_y( %x, %n, zeroinitializer @@ -1816,8 +1796,7 @@ define @fma_nxv2f64_y( %x, %n, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll index 2e993a85760c6..8a84d3ca2328c 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -27,9 +27,6 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) { ; CHECK-SD-FP16-LABEL: add_v2HalfH: ; CHECK-SD-FP16: // %bb.0: ; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-FP16-NEXT: mov v0.h[2], wzr -; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr -; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h ; CHECK-SD-FP16-NEXT: faddp h0, v0.2h ; CHECK-SD-FP16-NEXT: ret ; diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll index ff89cc21b56da..43f7cd96a3b48 100644 --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll @@ -1,14 +1,67 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI %s - -; GCN-LABEL: {{^}}fmuladd_f64: -; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @fmuladd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, - ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 { +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,SI-STRICT %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,SI-STRICT %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,SI-CONTRACT %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,SI-CONTRACT %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,VI-STRICT %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,VI-CONTRACT %s + +define amdgpu_kernel void @fmuladd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 { +; SI-LABEL: fmuladd_f64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s2 +; SI-NEXT: s_mov_b32 s13, s3 +; SI-NEXT: s_mov_b32 s16, s4 +; SI-NEXT: s_mov_b32 s17, s5 +; SI-NEXT: s_mov_b32 s18, s10 +; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 +; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 +; SI-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s8, s0 +; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fmuladd_f64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s14, s10 +; VI-NEXT: s_mov_b32 s15, s11 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 +; VI-NEXT: s_mov_b32 s4, s6 +; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 +; VI-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 +; VI-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; VI-NEXT: s_endpgm %r0 = load double, ptr addrspace(1) %in1 %r1 = load double, ptr addrspace(1) %in2 %r2 = load double, ptr addrspace(1) %in3 @@ -17,13 +70,122 @@ define amdgpu_kernel void @fmuladd_f64(ptr addrspace(1) %out, ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}fmul_fadd_f64: -; GCN-CONTRACT: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} - -; GCN-STRICT: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -; GCN-STRICT: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @fmul_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, - ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 { +define amdgpu_kernel void @fmul_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 { +; SI-STRICT-LABEL: fmul_fadd_f64: +; SI-STRICT: ; %bb.0: +; SI-STRICT-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 +; SI-STRICT-NEXT: s_mov_b32 s11, 0xf000 +; SI-STRICT-NEXT: s_mov_b32 s10, -1 +; SI-STRICT-NEXT: s_mov_b32 s14, s10 +; SI-STRICT-NEXT: s_mov_b32 s15, s11 +; SI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; SI-STRICT-NEXT: s_mov_b32 s12, s2 +; SI-STRICT-NEXT: s_mov_b32 s13, s3 +; SI-STRICT-NEXT: s_mov_b32 s16, s4 +; SI-STRICT-NEXT: s_mov_b32 s17, s5 +; SI-STRICT-NEXT: s_mov_b32 s18, s10 +; SI-STRICT-NEXT: s_mov_b32 s19, s11 +; SI-STRICT-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 +; SI-STRICT-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 +; SI-STRICT-NEXT: s_mov_b32 s4, s6 +; SI-STRICT-NEXT: s_mov_b32 s5, s7 +; SI-STRICT-NEXT: s_mov_b32 s6, s10 +; SI-STRICT-NEXT: s_mov_b32 s7, s11 +; SI-STRICT-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 +; SI-STRICT-NEXT: s_mov_b32 s8, s0 +; SI-STRICT-NEXT: s_mov_b32 s9, s1 +; SI-STRICT-NEXT: s_waitcnt vmcnt(1) +; SI-STRICT-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] +; SI-STRICT-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-STRICT-NEXT: s_endpgm +; +; SI-CONTRACT-LABEL: fmul_fadd_f64: +; SI-CONTRACT: ; %bb.0: +; SI-CONTRACT-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 +; SI-CONTRACT-NEXT: s_mov_b32 s11, 0xf000 +; SI-CONTRACT-NEXT: s_mov_b32 s10, -1 +; SI-CONTRACT-NEXT: s_mov_b32 s14, s10 +; SI-CONTRACT-NEXT: s_mov_b32 s15, s11 +; SI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; SI-CONTRACT-NEXT: s_mov_b32 s12, s2 +; SI-CONTRACT-NEXT: s_mov_b32 s13, s3 +; SI-CONTRACT-NEXT: s_mov_b32 s16, s4 +; SI-CONTRACT-NEXT: s_mov_b32 s17, s5 +; SI-CONTRACT-NEXT: s_mov_b32 s18, s10 +; SI-CONTRACT-NEXT: s_mov_b32 s19, s11 +; SI-CONTRACT-NEXT: s_mov_b32 s4, s6 +; SI-CONTRACT-NEXT: s_mov_b32 s5, s7 +; SI-CONTRACT-NEXT: s_mov_b32 s6, s10 +; SI-CONTRACT-NEXT: s_mov_b32 s7, s11 +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 +; SI-CONTRACT-NEXT: s_mov_b32 s8, s0 +; SI-CONTRACT-NEXT: s_mov_b32 s9, s1 +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; SI-CONTRACT-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-CONTRACT-NEXT: s_endpgm +; +; VI-STRICT-LABEL: fmul_fadd_f64: +; VI-STRICT: ; %bb.0: +; VI-STRICT-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; VI-STRICT-NEXT: s_mov_b32 s11, 0xf000 +; VI-STRICT-NEXT: s_mov_b32 s10, -1 +; VI-STRICT-NEXT: s_mov_b32 s14, s10 +; VI-STRICT-NEXT: s_mov_b32 s15, s11 +; VI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; VI-STRICT-NEXT: s_mov_b32 s12, s2 +; VI-STRICT-NEXT: s_mov_b32 s13, s3 +; VI-STRICT-NEXT: s_mov_b32 s16, s4 +; VI-STRICT-NEXT: s_mov_b32 s17, s5 +; VI-STRICT-NEXT: s_mov_b32 s18, s10 +; VI-STRICT-NEXT: s_mov_b32 s19, s11 +; VI-STRICT-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 +; VI-STRICT-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 +; VI-STRICT-NEXT: s_mov_b32 s4, s6 +; VI-STRICT-NEXT: s_mov_b32 s5, s7 +; VI-STRICT-NEXT: s_mov_b32 s6, s10 +; VI-STRICT-NEXT: s_mov_b32 s7, s11 +; VI-STRICT-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 +; VI-STRICT-NEXT: s_mov_b32 s8, s0 +; VI-STRICT-NEXT: s_mov_b32 s9, s1 +; VI-STRICT-NEXT: s_waitcnt vmcnt(1) +; VI-STRICT-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] +; VI-STRICT-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; VI-STRICT-NEXT: s_endpgm +; +; VI-CONTRACT-LABEL: fmul_fadd_f64: +; VI-CONTRACT: ; %bb.0: +; VI-CONTRACT-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; VI-CONTRACT-NEXT: s_mov_b32 s11, 0xf000 +; VI-CONTRACT-NEXT: s_mov_b32 s10, -1 +; VI-CONTRACT-NEXT: s_mov_b32 s14, s10 +; VI-CONTRACT-NEXT: s_mov_b32 s15, s11 +; VI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; VI-CONTRACT-NEXT: s_mov_b32 s12, s2 +; VI-CONTRACT-NEXT: s_mov_b32 s13, s3 +; VI-CONTRACT-NEXT: s_mov_b32 s16, s4 +; VI-CONTRACT-NEXT: s_mov_b32 s17, s5 +; VI-CONTRACT-NEXT: s_mov_b32 s18, s10 +; VI-CONTRACT-NEXT: s_mov_b32 s19, s11 +; VI-CONTRACT-NEXT: s_mov_b32 s4, s6 +; VI-CONTRACT-NEXT: s_mov_b32 s5, s7 +; VI-CONTRACT-NEXT: s_mov_b32 s6, s10 +; VI-CONTRACT-NEXT: s_mov_b32 s7, s11 +; VI-CONTRACT-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 +; VI-CONTRACT-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 +; VI-CONTRACT-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 +; VI-CONTRACT-NEXT: s_mov_b32 s8, s0 +; VI-CONTRACT-NEXT: s_mov_b32 s9, s1 +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; VI-CONTRACT-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; VI-CONTRACT-NEXT: s_endpgm %r0 = load double, ptr addrspace(1) %in1 %r1 = load double, ptr addrspace(1) %in2 %r2 = load double, ptr addrspace(1) %in3 @@ -33,11 +195,62 @@ define amdgpu_kernel void @fmul_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) ret void } -; GCN-LABEL: {{^}}fmul_fadd_contract_f64: -; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} - -define amdgpu_kernel void @fmul_fadd_contract_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, - ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 { +define amdgpu_kernel void @fmul_fadd_contract_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 { +; SI-LABEL: fmul_fadd_contract_f64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s2 +; SI-NEXT: s_mov_b32 s13, s3 +; SI-NEXT: s_mov_b32 s16, s4 +; SI-NEXT: s_mov_b32 s17, s5 +; SI-NEXT: s_mov_b32 s18, s10 +; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 +; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 +; SI-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 +; SI-NEXT: s_mov_b32 s8, s0 +; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fmul_fadd_contract_f64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s14, s10 +; VI-NEXT: s_mov_b32 s15, s11 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s12, s2 +; VI-NEXT: s_mov_b32 s13, s3 +; VI-NEXT: s_mov_b32 s16, s4 +; VI-NEXT: s_mov_b32 s17, s5 +; VI-NEXT: s_mov_b32 s18, s10 +; VI-NEXT: s_mov_b32 s19, s11 +; VI-NEXT: s_mov_b32 s4, s6 +; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0 +; VI-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 +; VI-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; VI-NEXT: s_endpgm %r0 = load double, ptr addrspace(1) %in1 %r1 = load double, ptr addrspace(1) %in2 %r2 = load double, ptr addrspace(1) %in3 @@ -47,20 +260,76 @@ define amdgpu_kernel void @fmul_fadd_contract_f64(ptr addrspace(1) %out, ptr add ret void } -; GCN-LABEL: {{^}}fadd_a_a_b_f64: -; GCN: {{buffer|flat}}_load_dwordx2 [[R1:v\[[0-9]+:[0-9]+\]]], -; GCN: {{buffer|flat}}_load_dwordx2 [[R2:v\[[0-9]+:[0-9]+\]]], - -; GCN-STRICT: v_add_f64 [[TMP:v\[[0-9]+:[0-9]+\]]], [[R1]], [[R1]] -; GCN-STRICT: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP]], [[R2]] - -; GCN-CONTRACT: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[R1]], 2.0, [[R2]] - -; SI: buffer_store_dwordx2 [[RESULT]] -; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] -define amdgpu_kernel void @fadd_a_a_b_f64(ptr addrspace(1) %out, - ptr addrspace(1) %in1, - ptr addrspace(1) %in2) #0 { +define amdgpu_kernel void @fadd_a_a_b_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { +; SI-STRICT-LABEL: fadd_a_a_b_f64: +; SI-STRICT: ; %bb.0: +; SI-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-STRICT-NEXT: s_mov_b32 s3, 0xf000 +; SI-STRICT-NEXT: s_mov_b32 s2, 0 +; SI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-STRICT-NEXT: v_mov_b32_e32 v1, 0 +; SI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[2:3] +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[4:5] +; SI-STRICT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-STRICT-NEXT: s_endpgm +; +; SI-CONTRACT-LABEL: fadd_a_a_b_f64: +; SI-CONTRACT: ; %bb.0: +; SI-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-CONTRACT-NEXT: s_mov_b32 s3, 0xf000 +; SI-CONTRACT-NEXT: s_mov_b32 s2, 0 +; SI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-CONTRACT-NEXT: v_mov_b32_e32 v1, 0 +; SI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], 2.0, v[4:5] +; SI-CONTRACT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-CONTRACT-NEXT: s_endpgm +; +; VI-STRICT-LABEL: fadd_a_a_b_f64: +; VI-STRICT: ; %bb.0: +; VI-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; VI-STRICT-NEXT: v_mov_b32_e32 v1, s1 +; VI-STRICT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[2:3], v[0:1] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_u32_e32 v4, vcc, 8, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[4:5], v[4:5] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[2:3] +; VI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[4:5] +; VI-STRICT-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-STRICT-NEXT: s_endpgm +; +; VI-CONTRACT-LABEL: fadd_a_a_b_f64: +; VI-CONTRACT: ; %bb.0: +; VI-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; VI-CONTRACT-NEXT: v_mov_b32_e32 v1, s1 +; VI-CONTRACT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[4:5], v[0:1] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[4:5], 2.0, v[2:3] +; VI-CONTRACT-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-CONTRACT-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 @@ -75,20 +344,76 @@ define amdgpu_kernel void @fadd_a_a_b_f64(ptr addrspace(1) %out, ret void } -; GCN-LABEL: {{^}}fadd_b_a_a_f64: -; GCN: {{buffer|flat}}_load_dwordx2 [[R1:v\[[0-9]+:[0-9]+\]]], -; GCN: {{buffer|flat}}_load_dwordx2 [[R2:v\[[0-9]+:[0-9]+\]]], - -; GCN-STRICT: v_add_f64 [[TMP:v\[[0-9]+:[0-9]+\]]], [[R1]], [[R1]] -; GCN-STRICT: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[R2]], [[TMP]] - -; GCN-CONTRACT: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[R1]], 2.0, [[R2]] - -; SI: buffer_store_dwordx2 [[RESULT]] -; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] -define amdgpu_kernel void @fadd_b_a_a_f64(ptr addrspace(1) %out, - ptr addrspace(1) %in1, - ptr addrspace(1) %in2) #0 { +define amdgpu_kernel void @fadd_b_a_a_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { +; SI-STRICT-LABEL: fadd_b_a_a_f64: +; SI-STRICT: ; %bb.0: +; SI-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-STRICT-NEXT: s_mov_b32 s3, 0xf000 +; SI-STRICT-NEXT: s_mov_b32 s2, 0 +; SI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-STRICT-NEXT: v_mov_b32_e32 v1, 0 +; SI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[2:3] +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[4:5], v[2:3] +; SI-STRICT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-STRICT-NEXT: s_endpgm +; +; SI-CONTRACT-LABEL: fadd_b_a_a_f64: +; SI-CONTRACT: ; %bb.0: +; SI-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-CONTRACT-NEXT: s_mov_b32 s3, 0xf000 +; SI-CONTRACT-NEXT: s_mov_b32 s2, 0 +; SI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-CONTRACT-NEXT: v_mov_b32_e32 v1, 0 +; SI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], 2.0, v[4:5] +; SI-CONTRACT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-CONTRACT-NEXT: s_endpgm +; +; VI-STRICT-LABEL: fadd_b_a_a_f64: +; VI-STRICT: ; %bb.0: +; VI-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; VI-STRICT-NEXT: v_mov_b32_e32 v1, s1 +; VI-STRICT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[2:3], v[0:1] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_u32_e32 v4, vcc, 8, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[4:5], v[4:5] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[2:3] +; VI-STRICT-NEXT: v_add_f64 v[2:3], v[4:5], v[2:3] +; VI-STRICT-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-STRICT-NEXT: s_endpgm +; +; VI-CONTRACT-LABEL: fadd_b_a_a_f64: +; VI-CONTRACT: ; %bb.0: +; VI-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; VI-CONTRACT-NEXT: v_mov_b32_e32 v1, s1 +; VI-CONTRACT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[4:5], v[0:1] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[4:5], 2.0, v[2:3] +; VI-CONTRACT-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-CONTRACT-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 @@ -103,12 +428,98 @@ define amdgpu_kernel void @fadd_b_a_a_f64(ptr addrspace(1) %out, ret void } -; GCN-LABEL: {{^}}mad_sub_f64: -; GCN-STRICT: v_mul_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} -; GCN-STRICT: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}} - -; GCN-CONTRACT: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @mad_sub_f64(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 { +; SI-STRICT-LABEL: mad_sub_f64: +; SI-STRICT: ; %bb.0: +; SI-STRICT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-STRICT-NEXT: s_mov_b32 s7, 0xf000 +; SI-STRICT-NEXT: s_mov_b32 s6, 0 +; SI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-STRICT-NEXT: v_mov_b32_e32 v1, 0 +; SI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; SI-STRICT-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-STRICT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 offset:8 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[6:7], v[0:1], s[4:7], 0 addr64 offset:16 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: s_mov_b64 s[2:3], s[6:7] +; SI-STRICT-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5] +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] +; SI-STRICT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-STRICT-NEXT: s_endpgm +; +; SI-CONTRACT-LABEL: mad_sub_f64: +; SI-CONTRACT: ; %bb.0: +; SI-CONTRACT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-CONTRACT-NEXT: s_mov_b32 s7, 0xf000 +; SI-CONTRACT-NEXT: s_mov_b32 s6, 0 +; SI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-CONTRACT-NEXT: v_mov_b32_e32 v1, 0 +; SI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; SI-CONTRACT-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 offset:8 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[6:7], v[0:1], s[4:7], 0 addr64 offset:16 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: s_mov_b64 s[2:3], s[6:7] +; SI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], -v[6:7] +; SI-CONTRACT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-CONTRACT-NEXT: s_endpgm +; +; VI-STRICT-LABEL: mad_sub_f64: +; VI-STRICT: ; %bb.0: +; VI-STRICT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-STRICT-NEXT: v_lshlrev_b32_e32 v6, 3, v0 +; VI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; VI-STRICT-NEXT: v_mov_b32_e32 v1, s3 +; VI-STRICT-NEXT: v_add_u32_e32 v0, vcc, s2, v6 +; VI-STRICT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-STRICT-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[4:5], v[0:1] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] +; VI-STRICT-NEXT: v_add_f64 v[0:1], v[2:3], -v[0:1] +; VI-STRICT-NEXT: v_mov_b32_e32 v3, s1 +; VI-STRICT-NEXT: v_add_u32_e32 v2, vcc, s0, v6 +; VI-STRICT-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-STRICT-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-STRICT-NEXT: s_endpgm +; +; VI-CONTRACT-LABEL: mad_sub_f64: +; VI-CONTRACT: ; %bb.0: +; VI-CONTRACT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 3, v0 +; VI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; VI-CONTRACT-NEXT: v_mov_b32_e32 v1, s3 +; VI-CONTRACT-NEXT: v_add_u32_e32 v0, vcc, s2, v6 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[4:5], v[4:5] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] +; VI-CONTRACT-NEXT: v_mov_b32_e32 v3, s1 +; VI-CONTRACT-NEXT: v_add_u32_e32 v2, vcc, s0, v6 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; VI-CONTRACT-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-CONTRACT-NEXT: s_endpgm %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 %gep0 = getelementptr double, ptr addrspace(1) %ptr, i64 %tid.ext @@ -126,14 +537,76 @@ define amdgpu_kernel void @mad_sub_f64(ptr addrspace(1) noalias nocapture %out, ret void } -; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast_add0: -; GCN-STRICT: v_add_f64 -; GCN-STRICT: v_add_f64 - -; GCN-CONTRACT: v_fma_f64 -define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(ptr addrspace(1) %out, - ptr addrspace(1) %in1, - ptr addrspace(1) %in2) #0 { +define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { +; SI-STRICT-LABEL: fadd_a_a_b_f64_fast_add0: +; SI-STRICT: ; %bb.0: +; SI-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-STRICT-NEXT: s_mov_b32 s3, 0xf000 +; SI-STRICT-NEXT: s_mov_b32 s2, 0 +; SI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-STRICT-NEXT: v_mov_b32_e32 v1, 0 +; SI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[2:3] +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[4:5] +; SI-STRICT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-STRICT-NEXT: s_endpgm +; +; SI-CONTRACT-LABEL: fadd_a_a_b_f64_fast_add0: +; SI-CONTRACT: ; %bb.0: +; SI-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-CONTRACT-NEXT: s_mov_b32 s3, 0xf000 +; SI-CONTRACT-NEXT: s_mov_b32 s2, 0 +; SI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-CONTRACT-NEXT: v_mov_b32_e32 v1, 0 +; SI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], 2.0, v[4:5] +; SI-CONTRACT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-CONTRACT-NEXT: s_endpgm +; +; VI-STRICT-LABEL: fadd_a_a_b_f64_fast_add0: +; VI-STRICT: ; %bb.0: +; VI-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; VI-STRICT-NEXT: v_mov_b32_e32 v1, s1 +; VI-STRICT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[2:3], v[0:1] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_u32_e32 v4, vcc, 8, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[4:5], v[4:5] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[2:3] +; VI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[4:5] +; VI-STRICT-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-STRICT-NEXT: s_endpgm +; +; VI-CONTRACT-LABEL: fadd_a_a_b_f64_fast_add0: +; VI-CONTRACT: ; %bb.0: +; VI-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; VI-CONTRACT-NEXT: v_mov_b32_e32 v1, s1 +; VI-CONTRACT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[4:5], v[0:1] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[4:5], 2.0, v[2:3] +; VI-CONTRACT-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-CONTRACT-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 @@ -148,14 +621,76 @@ define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(ptr addrspace(1) %out, ret void } -; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast_add1: -; GCN-STRICT: v_add_f64 -; GCN-STRICT: v_add_f64 - -; GCN-CONTRACT: v_fma_f64 -define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(ptr addrspace(1) %out, - ptr addrspace(1) %in1, - ptr addrspace(1) %in2) #0 { +define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { +; SI-STRICT-LABEL: fadd_a_a_b_f64_fast_add1: +; SI-STRICT: ; %bb.0: +; SI-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-STRICT-NEXT: s_mov_b32 s3, 0xf000 +; SI-STRICT-NEXT: s_mov_b32 s2, 0 +; SI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-STRICT-NEXT: v_mov_b32_e32 v1, 0 +; SI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-STRICT-NEXT: s_waitcnt vmcnt(0) +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[2:3] +; SI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[4:5] +; SI-STRICT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-STRICT-NEXT: s_endpgm +; +; SI-CONTRACT-LABEL: fadd_a_a_b_f64_fast_add1: +; SI-CONTRACT: ; %bb.0: +; SI-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-CONTRACT-NEXT: s_mov_b32 s3, 0xf000 +; SI-CONTRACT-NEXT: s_mov_b32 s2, 0 +; SI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-CONTRACT-NEXT: v_mov_b32_e32 v1, 0 +; SI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; SI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], 2.0, v[4:5] +; SI-CONTRACT-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-CONTRACT-NEXT: s_endpgm +; +; VI-STRICT-LABEL: fadd_a_a_b_f64_fast_add1: +; VI-STRICT: ; %bb.0: +; VI-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-STRICT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-STRICT-NEXT: s_waitcnt lgkmcnt(0) +; VI-STRICT-NEXT: v_mov_b32_e32 v1, s1 +; VI-STRICT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[2:3], v[0:1] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_u32_e32 v4, vcc, 8, v0 +; VI-STRICT-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-STRICT-NEXT: flat_load_dwordx2 v[4:5], v[4:5] glc +; VI-STRICT-NEXT: s_waitcnt vmcnt(0) +; VI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[2:3] +; VI-STRICT-NEXT: v_add_f64 v[2:3], v[2:3], v[4:5] +; VI-STRICT-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-STRICT-NEXT: s_endpgm +; +; VI-CONTRACT-LABEL: fadd_a_a_b_f64_fast_add1: +; VI-CONTRACT: ; %bb.0: +; VI-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-CONTRACT-NEXT: s_waitcnt lgkmcnt(0) +; VI-CONTRACT-NEXT: v_mov_b32_e32 v1, s1 +; VI-CONTRACT-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-CONTRACT-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[4:5], v[0:1] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc +; VI-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; VI-CONTRACT-NEXT: v_fma_f64 v[2:3], v[4:5], 2.0, v[2:3] +; VI-CONTRACT-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-CONTRACT-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 @@ -170,11 +705,40 @@ define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(ptr addrspace(1) %out, ret void } -; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast: -; GCN: v_fma_f64 -define amdgpu_kernel void @fadd_a_a_b_f64_fast(ptr addrspace(1) %out, - ptr addrspace(1) %in1, - ptr addrspace(1) %in2) #0 { +define amdgpu_kernel void @fadd_a_a_b_f64_fast(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 { +; SI-LABEL: fadd_a_a_b_f64_fast: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 glc +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8 glc +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_fma_f64 v[2:3], v[2:3], 2.0, v[4:5] +; SI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fadd_a_a_b_f64_fast: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dwordx2 v[4:5], v[0:1] glc +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_fma_f64 v[2:3], v[4:5], 2.0, v[2:3] +; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll index 8d44330b1b973..6f6ff96a1a196 100644 --- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s diff --git a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll index 70ea0688c8a49..d8079651787ad 100644 --- a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll @@ -23,9 +23,9 @@ define void @test_i8load_v4i8store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GCN-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GCN-SDAG-NEXT: v_or_b32_e32 v1, v3, v2 -; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) ; GCN-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GCN-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GCN-SDAG-NEXT: global_store_b32 v[8:9], v0, off @@ -41,8 +41,7 @@ define void @test_i8load_v4i8store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: s_wait_loadcnt 0x1 ; GCN-GISEL-NEXT: v_lshl_or_b32 v0, v1, 8, v0 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 -; GCN-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v2 -; GCN-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GCN-GISEL-NEXT: v_dual_lshlrev_b32 v1, 16, v2 :: v_dual_lshlrev_b32 v2, 24, v2 ; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GCN-GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 ; GCN-GISEL-NEXT: global_store_b32 v[8:9], v0, off @@ -66,17 +65,15 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2 ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 ; GCN-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[2:3], off -; GCN-SDAG-NEXT: v_mov_b32_e32 v8, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v9, 0 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 ; GCN-SDAG-NEXT: v_pk_add_u16 v10, v6, v2 ; GCN-SDAG-NEXT: v_pk_add_u16 v11, v7, v3 ; GCN-SDAG-NEXT: s_wait_xcnt 0x0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 12 -; GCN-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v6, 8 +; GCN-SDAG-NEXT: v_dual_mov_b32 v2, 12 :: v_dual_mov_b32 v6, 8 +; GCN-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v7, 0 +; GCN-SDAG-NEXT: v_mov_b32_e32 v8, 0 ; GCN-SDAG-NEXT: v_pk_add_u16 v4, v4, v0 -; GCN-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v10 -; GCN-SDAG-NEXT: v_mov_b32_e32 v7, 0 +; GCN-SDAG-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_lshrrev_b32 v0, 16, v10 ; GCN-SDAG-NEXT: v_pk_add_u16 v5, v5, v1 ; GCN-SDAG-NEXT: s_clause 0x2 ; GCN-SDAG-NEXT: global_store_b16 v[2:3], v11, off @@ -90,18 +87,19 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2 ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 ; GCN-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off ; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[2:3], off -; GCN-GISEL-NEXT: v_mov_b32_e32 v8, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 2 -; GCN-GISEL-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 4 -; GCN-GISEL-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 6 -; GCN-GISEL-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 8 -; GCN-GISEL-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 10 -; GCN-GISEL-NEXT: v_dual_mov_b32 v20, 12 :: v_dual_mov_b32 v19, 0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v21, 0 +; GCN-GISEL-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v10, 2 +; GCN-GISEL-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v11, 0 +; GCN-GISEL-NEXT: v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v14, 6 +; GCN-GISEL-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v15, 0 +; GCN-GISEL-NEXT: v_dual_mov_b32 v16, 8 :: v_dual_mov_b32 v18, 10 +; GCN-GISEL-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v19, 0 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0 ; GCN-GISEL-NEXT: v_pk_add_u16 v2, v6, v2 ; GCN-GISEL-NEXT: v_pk_add_u16 v4, v4, v0 +; GCN-GISEL-NEXT: v_mov_b32_e32 v20, 12 ; GCN-GISEL-NEXT: v_pk_add_u16 v1, v5, v1 +; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GCN-GISEL-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_lshrrev_b32 v0, 16, v2 ; GCN-GISEL-NEXT: v_pk_add_u16 v3, v7, v3 ; GCN-GISEL-NEXT: s_clause 0x6 ; GCN-GISEL-NEXT: global_store_b16 v[8:9], v4, off @@ -111,7 +109,6 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2 ; GCN-GISEL-NEXT: global_store_b16 v[16:17], v2, off ; GCN-GISEL-NEXT: global_store_d16_hi_b16 v[18:19], v2, off ; GCN-GISEL-NEXT: global_store_b16 v[20:21], v3, off -; GCN-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v2 ; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31] %vec1 = load <7 x i16>, ptr addrspace(1) %ptr1 %insert = insertelement <7 x i16> %vec1, i16 20, i32 4 @@ -319,14 +316,13 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: v_mov_b32_e32 v16, 0x70 ; GCN-SDAG-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v50, 0x60 ; GCN-SDAG-NEXT: v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48 -; GCN-SDAG-NEXT: v_dual_mov_b32 v38, 0x50 :: v_dual_mov_b32 v53, 0 ; GCN-SDAG-NEXT: v_mov_b32_e32 v54, 32 -; GCN-SDAG-NEXT: v_dual_mov_b32 v14, 0xc8 :: v_dual_mov_b32 v15, 0 -; GCN-SDAG-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 64 -; GCN-SDAG-NEXT: v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v40, 16 -; GCN-SDAG-NEXT: v_mov_b32_e32 v49, 0 -; GCN-SDAG-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v43, 0 +; GCN-SDAG-NEXT: v_dual_mov_b32 v38, 0x50 :: v_dual_mov_b32 v53, 0 +; GCN-SDAG-NEXT: v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v14, 0xc8 +; GCN-SDAG-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v39, 0 +; GCN-SDAG-NEXT: v_dual_mov_b32 v48, 64 :: v_dual_mov_b32 v40, 16 +; GCN-SDAG-NEXT: v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v49, 0 +; GCN-SDAG-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v43, 0 ; GCN-SDAG-NEXT: s_wait_loadcnt 0x7 ; GCN-SDAG-NEXT: global_store_b128 v[16:17], v[6:9], off ; GCN-SDAG-NEXT: s_wait_loadcnt 0x6 @@ -408,15 +404,16 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-GISEL-NEXT: v_mov_b32_e32 v34, 0xc8 ; GCN-GISEL-NEXT: v_dual_mov_b32 v35, 0 :: v_dual_mov_b32 v38, 0 ; GCN-GISEL-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 16 -; GCN-GISEL-NEXT: v_dual_mov_b32 v49, 0 :: v_dual_mov_b32 v50, 32 -; GCN-GISEL-NEXT: v_dual_mov_b32 v52, 48 :: v_dual_mov_b32 v51, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v53, 0 :: v_dual_mov_b32 v54, 64 -; GCN-GISEL-NEXT: v_dual_mov_b32 v40, 0x50 :: v_dual_mov_b32 v55, 0 -; GCN-GISEL-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0x60 +; GCN-GISEL-NEXT: v_dual_mov_b32 v50, 32 :: v_dual_mov_b32 v49, 0 +; GCN-GISEL-NEXT: v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48 +; GCN-GISEL-NEXT: v_mov_b32_e32 v54, 64 +; GCN-GISEL-NEXT: v_dual_mov_b32 v40, 0x50 :: v_dual_mov_b32 v53, 0 +; GCN-GISEL-NEXT: v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v41, 0 +; GCN-GISEL-NEXT: v_mov_b32_e32 v42, 0x60 ; GCN-GISEL-NEXT: v_dual_mov_b32 v44, 0x70 :: v_dual_mov_b32 v43, 0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v45, 0 ; GCN-GISEL-NEXT: s_wait_loadcnt 0x7 -; GCN-GISEL-NEXT: v_dual_mov_b32 v37, v9 :: v_dual_mov_b32 v36, v8 +; GCN-GISEL-NEXT: v_dual_mov_b32 v45, 0 :: v_dual_mov_b32 v37, v9 +; GCN-GISEL-NEXT: v_mov_b32_e32 v36, v8 ; GCN-GISEL-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, 0xc8 ; GCN-GISEL-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9] ; GCN-GISEL-NEXT: s_wait_loadcnt 0x6 @@ -491,12 +488,11 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-SDAG: ; %bb.0: ; GCN-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GCN-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GCN-SDAG-NEXT: s_wait_xcnt 0x0 +; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 ; GCN-SDAG-NEXT: v_mov_b32_e32 v8, 12 ; GCN-SDAG-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 8 -; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GCN-SDAG-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_lshlrev_b32 v4, 4, v0 -; GCN-SDAG-NEXT: s_wait_xcnt 0x0 -; GCN-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 ; GCN-SDAG-NEXT: v_mov_b32_e32 v12, 0 ; GCN-SDAG-NEXT: v_mov_b32_e32 v13, 0 ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0 @@ -520,17 +516,16 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1, ; GCN-GISEL: ; %bb.0: ; GCN-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; GCN-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GCN-GISEL-NEXT: s_wait_xcnt 0x0 +; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 ; GCN-GISEL-NEXT: v_mov_b32_e32 v8, 0 ; GCN-GISEL-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 2 -; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GCN-GISEL-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_lshlrev_b32 v4, 4, v0 -; GCN-GISEL-NEXT: s_wait_xcnt 0x0 -; GCN-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 ; GCN-GISEL-NEXT: v_mov_b32_e32 v12, 4 ; GCN-GISEL-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 6 ; GCN-GISEL-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 8 ; GCN-GISEL-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 10 -; GCN-GISEL-NEXT: v_dual_mov_b32 v20, 12 :: v_dual_mov_b32 v19, 0 +; GCN-GISEL-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 12 ; GCN-GISEL-NEXT: v_mov_b32_e32 v21, 0 ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0 ; GCN-GISEL-NEXT: s_clause 0x1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll index da537e9676ca9..bdec2c8545c7b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll @@ -1,5 +1,3 @@ -; RUN: llc -global-isel=0 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll index 234014fac9f5e..79288d76b414a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll @@ -1,5 +1,3 @@ -; RUN: llc -global-isel=0 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll new file mode 100644 index 0000000000000..2173d07baa57e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12 + +define amdgpu_ps void @test_asynccnt() { +; GFX12-LABEL: test_asynccnt: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_asynccnt 0x0 +; GFX12-NEXT: s_endpgm + call void @llvm.amdgcn.s.wait.asynccnt(i16 0) + ret void +} + +define amdgpu_ps void @test_tensorcnt() { +; GFX12-LABEL: test_tensorcnt: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_tensorcnt 0x0 +; GFX12-NEXT: s_endpgm + call void @llvm.amdgcn.s.wait.tensorcnt(i16 0) + ret void +} + +declare void @llvm.amdgcn.s.wait.asynccnt(i16) +declare void @llvm.amdgcn.s.wait.tensorcnt(i16) diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll index 1dd08c561b2ab..8b7102582c2d0 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll @@ -1,246 +1,872 @@ -; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s -; RUN: llc -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=SI %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=VI,VI-NO-DS128 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GFX9,GFX9-NO-DS128 %s +; RUN: llc -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG %s ; Testing for ds_read/write_b128 -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=VI,VI-DS128 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GFX9,GFX9-DS128 %s -; FUNC-LABEL: {{^}}local_load_i16: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_u16 v{{[0-9]+}} - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_load_i16(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_load_i16: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_u16 v0, v0 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b16 v1, v0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_load_i16: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_u16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: ds_write_b16 v1, v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_load_i16: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_u16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b16 v1, v0 +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_load_i16: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 4, @0, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_SHORT_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN entry: %ld = load i16, ptr addrspace(3) %in store i16 %ld, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_load_v2i16: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b32 - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_load_v2i16(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_load_v2i16: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b32 v0, v0 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b32 v1, v0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_load_v2i16: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_b32 v0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: ds_write_b32 v1, v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_load_v2i16: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_b32 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b32 v1, v0 +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_load_v2i16: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 4, @1, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN entry: %ld = load <2 x i16>, ptr addrspace(3) %in store <2 x i16> %ld, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_load_v3i16: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b64 -; GCN-DAG: ds_write_b32 -; GCN-DAG: ds_write_b16 - -; EG-DAG: LDS_USHORT_READ_RET -; EG-DAG: LDS_USHORT_READ_RET define amdgpu_kernel void @local_load_v3i16(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_load_v3i16: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b64 v[0:1], v0 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b32 v2, v0 +; SI-NEXT: ds_write_b16 v2, v1 offset:4 +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_load_v3i16: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_b64 v[0:1], v0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: ds_write_b16 v2, v1 offset:4 +; VI-NEXT: ds_write_b32 v2, v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_load_v3i16: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b16 v2, v1 offset:4 +; GFX9-NEXT: ds_write_b32 v2, v0 +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_load_v3i16: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 11, @2, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.Y, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_SHORT_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN entry: %ld = load <3 x i16>, ptr addrspace(3) %in store <3 x i16> %ld, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_load_v4i16: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b64 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_load_v4i16(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_load_v4i16: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b64 v[0:1], v0 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b64 v2, v[0:1] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_load_v4i16: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_b64 v[0:1], v0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: ds_write_b64 v2, v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_load_v4i16: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b64 v2, v[0:1] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_load_v4i16: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 11, @3, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN entry: %ld = load <4 x i16>, ptr addrspace(3) %in store <4 x i16> %ld, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_load_v8i16: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_load_v8i16(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_load_v8i16: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_load_v8i16: +; VI-NO-DS128: ; %bb.0: ; %entry +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_load_v8i16: +; GFX9-NO-DS128: ; %bb.0: ; %entry +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_load_v8i16: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 25, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_load_v8i16: +; VI-DS128: ; %bb.0: ; %entry +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v0 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_load_v8i16: +; GFX9-DS128: ; %bb.0: ; %entry +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] +; GFX9-DS128-NEXT: s_endpgm entry: %ld = load <8 x i16>, ptr addrspace(3) %in store <8 x i16> %ld, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_load_v16i16: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_load_v16i16(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_load_v16i16: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v4, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v4 offset0:2 offset1:3 +; SI-NEXT: ds_read2_b64 v[4:7], v4 offset1:1 +; SI-NEXT: v_mov_b32_e32 v8, s0 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: ds_write2_b64 v8, v[0:1], v[2:3] offset0:2 offset1:3 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: ds_write2_b64 v8, v[4:5], v[6:7] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_load_v16i16: +; VI-NO-DS128: ; %bb.0: ; %entry +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset1:1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v8, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: ds_write2_b64 v8, v[0:1], v[2:3] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: ds_write2_b64 v8, v[4:5], v[6:7] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_load_v16i16: +; GFX9-NO-DS128: ; %bb.0: ; %entry +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset1:1 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: ds_write2_b64 v8, v[0:1], v[2:3] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: ds_write2_b64 v8, v[4:5], v[6:7] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_load_v16i16: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 53, @5, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_load_v16i16: +; VI-DS128: ; %bb.0: ; %entry +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v4 offset:16 +; VI-DS128-NEXT: ds_read_b128 v[4:7], v4 +; VI-DS128-NEXT: v_mov_b32_e32 v8, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:16 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_load_v16i16: +; GFX9-DS128: ; %bb.0: ; %entry +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v4 offset:16 +; GFX9-DS128-NEXT: ds_read_b128 v[4:7], v4 +; GFX9-DS128-NEXT: v_mov_b32_e32 v8, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:16 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7] +; GFX9-DS128-NEXT: s_endpgm entry: %ld = load <16 x i16>, ptr addrspace(3) %in store <16 x i16> %ld, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_i16_to_i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_u16 -; GCN: ds_write_b32 - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_i16_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_i16_to_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_u16 v0, v0 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b32 v1, v0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_zextload_i16_to_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_u16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: ds_write_b32 v1, v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_zextload_i16_to_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_u16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b32 v1, v0 +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_i16_to_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 4, @6, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN %a = load i16, ptr addrspace(3) %in %ext = zext i16 %a to i32 store i32 %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_i16_to_i32: -; GCN-NOT: s_wqm_b64 - -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_i16 - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal -; EG: 16 -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_sextload_i16_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_i16_to_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_i16 v0, v0 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b32 v1, v0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_sextload_i16_to_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_i16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: ds_write_b32 v1, v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_sextload_i16_to_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_i16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b32 v1, v0 +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_i16_to_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 6, @7, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.X, OQAP, +; EG-NEXT: BFE_INT T0.W, PV.X, 0.0, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN %a = load i16, ptr addrspace(3) %in %ext = sext i16 %a to i32 store i32 %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_u16 - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v1i16_to_v1i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_u16 v0, v0 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b32 v1, v0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_zextload_v1i16_to_v1i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_u16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: ds_write_b32 v1, v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_zextload_v1i16_to_v1i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_u16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b32 v1, v0 +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v1i16_to_v1i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 4, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN %load = load <1 x i16>, ptr addrspace(3) %in %ext = zext <1 x i16> %load to <1 x i32> store <1 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_i16 - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal -; EG: 16 -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v1i16_to_v1i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_i16 v0, v0 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b32 v1, v0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_sextload_v1i16_to_v1i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_i16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: ds_write_b32 v1, v0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_sextload_v1i16_to_v1i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_i16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b32 v1, v0 +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v1i16_to_v1i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.X, OQAP, +; EG-NEXT: BFE_INT T0.W, PV.X, 0.0, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN %load = load <1 x i16>, ptr addrspace(3) %in %ext = sext <1 x i16> %load to <1 x i32> store <1 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32: -; GCN-NOT: s_wqm_b64 -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b32 - -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v2i16_to_v2i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b32 v0, v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: ds_write_b64 v2, v[0:1] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_zextload_v2i16_to_v2i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_b32 v0, v0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-NEXT: ds_write_b64 v2, v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_zextload_v2i16_to_v2i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_b32 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: ds_write_b64 v2, v[0:1] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v2i16_to_v2i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 10, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.Y, OQAP, +; EG-NEXT: AND_INT T0.W, PV.Y, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN %load = load <2 x i16>, ptr addrspace(3) %in %ext = zext <2 x i16> %load to <2 x i32> store <2 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32: -; GCN-NOT: s_wqm_b64 -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b32 - -; EG: LDS_READ_RET -; EG: BFE_INT -; EG: BFE_INT define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v2i16_to_v2i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b32 v0, v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; SI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: ds_write_b64 v2, v[0:1] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_sextload_v2i16_to_v2i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_b32 v0, v0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; VI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NEXT: ds_write_b64 v2, v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_sextload_v2i16_to_v2i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_b32 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-NEXT: ds_write_b64 v2, v[0:1] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v2i16_to_v2i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 12, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.Y, OQAP, +; EG-NEXT: LSHR * T0.W, PV.Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T0.Y, 0.0, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN %load = load <2 x i16>, ptr addrspace(3) %in %ext = sext <2 x i16> %load to <2 x i32> store <2 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b64 -; SI-DAG: ds_write_b32 -; SI-DAG: ds_write_b64 -; CIVI-DAG: ds_write_b96 -; GFX9-DAG: ds_write_b96 - -; EG: LDS_USHORT_READ_RET -; EG: LDS_USHORT_READ_RET -; EG: LDS_USHORT_READ_RET define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_local_zextload_v3i16_to_v3i32: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b64 v[0:1], v0 +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; SI-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v1 +; SI-NEXT: ds_write_b32 v4, v0 offset:8 +; SI-NEXT: ds_write_b64 v4, v[2:3] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_local_zextload_v3i16_to_v3i32: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_b64 v[0:1], v0 +; VI-NEXT: v_mov_b32_e32 v3, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_and_b32_e32 v2, 0xffff, v1 +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-NEXT: ds_write_b96 v3, v[0:2] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_local_zextload_v3i16_to_v3i32: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: ds_write_b96 v3, v[0:2] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_local_zextload_v3i16_to_v3i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 18, @12, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.Z, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.Y, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN entry: %ld = load <3 x i16>, ptr addrspace(3) %in %ext = zext <3 x i16> %ld to <3 x i32> @@ -248,23 +874,79 @@ entry: ret void } -; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b64 -; SI-DAG: ds_write_b32 -; SI-DAG: ds_write_b64 -; CIVI-DAG: ds_write_b96 -; GFX9-DAG: ds_write_b96 - -; EG: LDS_USHORT_READ_RET -; EG: LDS_USHORT_READ_RET -; EG: LDS_USHORT_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_local_sextload_v3i16_to_v3i32: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b64 v[0:1], v0 +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v3, 16, v0 +; SI-NEXT: v_bfe_i32 v2, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v0, v1, 0, 16 +; SI-NEXT: ds_write_b32 v4, v0 offset:8 +; SI-NEXT: ds_write_b64 v4, v[2:3] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_local_sextload_v3i16_to_v3i32: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_b64 v[3:4], v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_ashrrev_i32_e32 v1, 16, v3 +; VI-NEXT: v_bfe_i32 v2, v4, 0, 16 +; VI-NEXT: v_bfe_i32 v0, v3, 0, 16 +; VI-NEXT: v_mov_b32_e32 v3, s0 +; VI-NEXT: ds_write_b96 v3, v[0:2] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_local_sextload_v3i16_to_v3i32: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_b64 v[3:4], v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_ashrrev_i32_e32 v1, 16, v3 +; GFX9-NEXT: v_bfe_i32 v2, v4, 0, 16 +; GFX9-NEXT: v_bfe_i32 v0, v3, 0, 16 +; GFX9-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NEXT: ds_write_b96 v3, v[0:2] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_local_sextload_v3i16_to_v3i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 22, @13, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: BFE_INT T0.W, T0.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T0.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T0.X, 0.0, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN entry: %ld = load <3 x i16>, ptr addrspace(3) %in %ext = sext <3 x i16> %ld to <3 x i32> @@ -272,659 +954,7978 @@ entry: ret void } -; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32: -; GCN-NOT: s_wqm_b64 -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b64 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_local_zextload_v4i16_to_v4i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b64 v[0:1], v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; SI-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v1 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_local_zextload_v4i16_to_v4i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read_b64 v[0:1], v0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; VI-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; VI-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s0 +; VI-NO-DS128-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_local_zextload_v4i16_to_v4i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v1 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_local_zextload_v4i16_to_v4i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 22, @14, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_local_zextload_v4i16_to_v4i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b64 v[0:1], v0 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; VI-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v1 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_local_zextload_v4i16_to_v4i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX9-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v1 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] +; GFX9-DS128-NEXT: s_endpgm %load = load <4 x i16>, ptr addrspace(3) %in %ext = zext <4 x i16> %load to <4 x i32> store <4 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32: -; GCN-NOT: s_wqm_b64 -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read_b64 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v4i16_to_v4i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b64 v[0:1], v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v2, 16, v1 +; SI-NEXT: v_ashrrev_i32_e32 v4, 16, v0 +; SI-NEXT: v_bfe_i32 v1, v1, 0, 16 +; SI-NEXT: v_bfe_i32 v3, v0, 0, 16 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: ds_write2_b64 v0, v[3:4], v[1:2] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v4i16_to_v4i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read_b64 v[0:1], v0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v0 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v1 +; VI-NO-DS128-NEXT: v_bfe_i32 v2, v0, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v1, 0, 16 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s0 +; VI-NO-DS128-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v4i16_to_v4i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v0 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v1 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v4i16_to_v4i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 25, @15, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: LSHR * T0.W, T0.Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T1.Z, PV.Z, literal.x, +; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T0.Y, 0.0, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T0.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v4i16_to_v4i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b64 v[4:5], v0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v5 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v4 +; VI-DS128-NEXT: v_bfe_i32 v2, v5, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v0, v4, 0, 16 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v4i16_to_v4i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b64 v[4:5], v0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v5 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v4 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v5, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v0, v4, 0, 16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] +; GFX9-DS128-NEXT: s_endpgm %load = load <4 x i16>, ptr addrspace(3) %in %ext = sext <4 x i16> %load to <4 x i32> store <4 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v8i16_to_v8i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; SI-NEXT: v_mov_b32_e32 v12, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v6, 0xffff, v1 +; SI-NEXT: v_and_b32_e32 v8, 0xffff, v2 +; SI-NEXT: v_and_b32_e32 v10, 0xffff, v3 +; SI-NEXT: ds_write2_b64 v12, v[8:9], v[10:11] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v12, v[4:5], v[6:7] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v8i16_to_v8i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; VI-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v2 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v3 +; VI-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v2, s0 +; VI-NO-DS128-NEXT: ds_write2_b64 v2, v[0:1], v[8:9] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v2, v[4:5], v[6:7] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v8i16_to_v8i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v10, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v1 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v2 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v3 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v10, v[0:1], v[8:9] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v10, v[4:5], v[6:7] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v8i16_to_v8i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 46, @16, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: AND_INT T1.W, T0.W, literal.x, +; EG-NEXT: MOV * T2.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v8i16_to_v8i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; VI-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; VI-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v3 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; VI-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v2 +; VI-DS128-NEXT: v_mov_b32_e32 v0, s0 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; VI-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v1 +; VI-DS128-NEXT: ds_write_b128 v0, v[8:11] offset:16 +; VI-DS128-NEXT: ds_write_b128 v0, v[4:7] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v8i16_to_v8i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v12, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; GFX9-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v3 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; GFX9-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v2 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v1 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[8:11] offset:16 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[4:7] +; GFX9-DS128-NEXT: s_endpgm %load = load <8 x i16>, ptr addrspace(3) %in %ext = zext <8 x i16> %load to <8 x i32> store <8 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v8i16_to_v8i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; SI-NEXT: v_mov_b32_e32 v12, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v5, 16, v0 +; SI-NEXT: v_ashrrev_i32_e32 v7, 16, v1 +; SI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 +; SI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 +; SI-NEXT: v_bfe_i32 v4, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v6, v1, 0, 16 +; SI-NEXT: v_bfe_i32 v8, v2, 0, 16 +; SI-NEXT: v_bfe_i32 v10, v3, 0, 16 +; SI-NEXT: ds_write2_b64 v12, v[8:9], v[10:11] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v12, v[4:5], v[6:7] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v8i16_to_v8i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v0 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v2 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v3 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v8, v2, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v10, v3, 0, 16 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s0 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v1 +; VI-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; VI-NO-DS128-NEXT: ds_write2_b64 v0, v[8:9], v[10:11] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v0, v[4:5], v[6:7] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v8i16_to_v8i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v12, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v2 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v3 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v0 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v1 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v12, v[8:9], v[10:11] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v12, v[4:5], v[6:7] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v8i16_to_v8i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 51, @17, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: LSHR * T1.W, T0.Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: LSHR T1.Z, T0.W, literal.x, +; EG-NEXT: BFE_INT T1.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T2.Z, T0.Y, literal.x, +; EG-NEXT: BFE_INT T1.W, T1.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.Z, T1.Y, literal.x, +; EG-NEXT: BFE_INT T1.W, T2.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: BFE_INT T1.W, T1.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: BFE_INT T1.W, T0.Z, 0.0, literal.x, +; EG-NEXT: MOV * T2.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: BFE_INT T0.W, T0.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T0.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v8i16_to_v8i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v3 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v2 +; VI-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v10, v3, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v8, v2, 0, 16 +; VI-DS128-NEXT: v_mov_b32_e32 v0, s0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v1 +; VI-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; VI-DS128-NEXT: ds_write_b128 v0, v[8:11] offset:16 +; VI-DS128-NEXT: ds_write_b128 v0, v[4:7] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v8i16_to_v8i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v12, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v3 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v2 +; GFX9-DS128-NEXT: v_bfe_i32 v10, v3, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v8, v2, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v1 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v0 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[8:11] offset:16 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[4:7] +; GFX9-DS128-NEXT: s_endpgm %load = load <8 x i16>, ptr addrspace(3) %in %ext = sext <8 x i16> %load to <8 x i32> store <8 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} - -; GCN: ds_write2_b64 -; GCN: ds_write2_b64 -; GCN: ds_write2_b64 -; GCN: ds_write2_b64 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v16i16_to_v16i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v4, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; SI-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v0 +; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v3 +; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v2 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v5 +; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v4 +; SI-NEXT: v_and_b32_e32 v8, 0xffff, v1 +; SI-NEXT: v_and_b32_e32 v10, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v12, 0xffff, v3 +; SI-NEXT: v_and_b32_e32 v14, 0xffff, v2 +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v7 +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v6 +; SI-NEXT: v_and_b32_e32 v16, 0xffff, v5 +; SI-NEXT: v_and_b32_e32 v18, 0xffff, v4 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v7 +; SI-NEXT: v_and_b32_e32 v2, 0xffff, v6 +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: ds_write2_b64 v4, v[2:3], v[0:1] offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v4, v[18:19], v[16:17] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v4, v[14:15], v[12:13] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v4, v[10:11], v[8:9] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v16i16_to_v16i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v16, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v7 +; VI-NO-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v7 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; VI-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; VI-NO-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v3 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; VI-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v5 +; VI-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v5 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; VI-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; VI-NO-DS128-NEXT: ds_write2_b64 v16, v[6:7], v[14:15] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v16, v[4:5], v[12:13] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v16, v[2:3], v[10:11] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v16, v[0:1], v[8:9] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v16i16_to_v16i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v16, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v7 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v7 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v1 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v3 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v5 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v5 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v16, v[6:7], v[14:15] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v16, v[4:5], v[12:13] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v16, v[2:3], v[10:11] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v16, v[0:1], v[8:9] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v16i16_to_v16i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 94, @18, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: MOV * T2.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: LSHR T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Z, literal.x, +; EG-NEXT: MOV * T3.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T1.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 44(6.165713e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 36(5.044674e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 52(7.286752e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v16i16_to_v16i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v4 +; VI-DS128-NEXT: ds_read_b128 v[4:7], v4 offset:16 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v1 +; VI-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v1 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v0 +; VI-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v4 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v7 +; VI-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v7 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v6 +; VI-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v6 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v3 +; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v3 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v2 +; VI-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v2 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v5 +; VI-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v5 +; VI-DS128-NEXT: ds_write_b128 v4, v[16:19] offset:48 +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] offset:32 +; VI-DS128-NEXT: ds_write_b128 v4, v[12:15] offset:16 +; VI-DS128-NEXT: ds_write_b128 v4, v[8:11] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v16i16_to_v16i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v4 +; GFX9-DS128-NEXT: ds_read_b128 v[4:7], v4 offset:16 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v1 +; GFX9-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v1 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v0 +; GFX9-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v4 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v7 +; GFX9-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v7 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v6 +; GFX9-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v6 +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v3 +; GFX9-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v3 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v2 +; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v2 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v5 +; GFX9-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v5 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[16:19] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[12:15] offset:16 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[8:11] +; GFX9-DS128-NEXT: s_endpgm %load = load <16 x i16>, ptr addrspace(3) %in %ext = zext <16 x i16> %load to <16 x i32> store <16 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v16i16_to_v16i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v4, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; SI-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_ashrrev_i32_e32 v9, 16, v1 +; SI-NEXT: v_ashrrev_i32_e32 v11, 16, v0 +; SI-NEXT: v_ashrrev_i32_e32 v13, 16, v3 +; SI-NEXT: v_ashrrev_i32_e32 v15, 16, v2 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v17, 16, v5 +; SI-NEXT: v_ashrrev_i32_e32 v19, 16, v4 +; SI-NEXT: v_bfe_i32 v8, v1, 0, 16 +; SI-NEXT: v_bfe_i32 v10, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v12, v3, 0, 16 +; SI-NEXT: v_bfe_i32 v14, v2, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v1, 16, v7 +; SI-NEXT: v_ashrrev_i32_e32 v3, 16, v6 +; SI-NEXT: v_bfe_i32 v16, v5, 0, 16 +; SI-NEXT: v_bfe_i32 v18, v4, 0, 16 +; SI-NEXT: v_bfe_i32 v0, v7, 0, 16 +; SI-NEXT: v_bfe_i32 v2, v6, 0, 16 +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: ds_write2_b64 v4, v[2:3], v[0:1] offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v4, v[18:19], v[16:17] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v4, v[14:15], v[12:13] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v4, v[10:11], v[8:9] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v16i16_to_v16i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v1 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v0 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v3 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v2 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v4 +; VI-NO-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v10, v0, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v12, v3, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v2, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v7 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v6 +; VI-NO-DS128-NEXT: v_bfe_i32 v18, v4, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v0, v7, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v2, v6, 0, 16 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v5 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v5, 0, 16 +; VI-NO-DS128-NEXT: ds_write2_b64 v4, v[2:3], v[0:1] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v4, v[18:19], v[16:17] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v4, v[14:15], v[12:13] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v4, v[10:11], v[8:9] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v16i16_to_v16i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v1 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v0 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v3 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v2 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v4 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v7 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v6 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v7, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v6, 0, 16 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v5 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v5, 0, 16 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v4, v[2:3], v[0:1] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v4, v[18:19], v[16:17] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v4, v[14:15], v[12:13] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v4, v[10:11], v[8:9] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v16i16_to_v16i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 95, @19, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: MOV * T2.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: LSHR T2.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: LSHR * T3.Z, T2.Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: BFE_INT T2.W, T2.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T4.Z, T0.Y, literal.x, +; EG-NEXT: BFE_INT T2.W, T3.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T3.Z, T0.Z, literal.x, +; EG-NEXT: BFE_INT T2.W, T4.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T4.Z, T0.W, literal.x, +; EG-NEXT: BFE_INT T2.W, T3.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T3.Z, T1.Y, literal.x, +; EG-NEXT: BFE_INT T2.W, T4.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 44(6.165713e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T4.Z, T1.Z, literal.x, +; EG-NEXT: BFE_INT T2.W, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 36(5.044674e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T3.Z, T2.Z, literal.x, +; EG-NEXT: BFE_INT T2.W, T4.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: BFE_INT T2.W, T3.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 52(7.286752e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: BFE_INT T1.W, T1.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: BFE_INT T1.W, T2.Y, 0.0, literal.x, +; EG-NEXT: MOV * T2.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: BFE_INT T1.W, T0.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: BFE_INT T1.W, T0.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: BFE_INT T0.W, T0.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ALU 7, @20, KC0[CB0:0-32], KC1[] +; EG-NEXT: BFE_INT T0.W, T1.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T2.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v16i16_to_v16i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v4 +; VI-DS128-NEXT: ds_read_b128 v[4:7], v4 offset:16 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v1 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v3 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v2 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v4 +; VI-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v8, v0, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v14, v3, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v7 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v6 +; VI-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v7, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v0, v6, 0, 16 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v5 +; VI-DS128-NEXT: v_bfe_i32 v18, v5, 0, 16 +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] offset:48 +; VI-DS128-NEXT: ds_write_b128 v4, v[16:19] offset:32 +; VI-DS128-NEXT: ds_write_b128 v4, v[12:15] offset:16 +; VI-DS128-NEXT: ds_write_b128 v4, v[8:11] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v16i16_to_v16i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v4 +; GFX9-DS128-NEXT: ds_read_b128 v[4:7], v4 offset:16 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v1 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v0 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v3 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v2 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v4 +; GFX9-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v8, v0, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v14, v3, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v7 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v6 +; GFX9-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v7, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v0, v6, 0, 16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v5 +; GFX9-DS128-NEXT: v_bfe_i32 v18, v5, 0, 16 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[16:19] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[12:15] offset:16 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[8:11] +; GFX9-DS128-NEXT: s_endpgm %load = load <16 x i16>, ptr addrspace(3) %in %ext = sext <16 x i16> %load to <16 x i32> store <16 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v32i16_to_v32i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v12, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v12 offset1:1 +; SI-NEXT: ds_read2_b64 v[4:7], v12 offset0:2 offset1:3 +; SI-NEXT: ds_read2_b64 v[8:11], v12 offset0:4 offset1:5 +; SI-NEXT: ds_read2_b64 v[12:15], v12 offset0:6 offset1:7 +; SI-NEXT: s_waitcnt lgkmcnt(3) +; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v1 +; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v0 +; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v3 +; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v2 +; SI-NEXT: v_and_b32_e32 v16, 0xffff, v1 +; SI-NEXT: v_and_b32_e32 v18, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v20, 0xffff, v3 +; SI-NEXT: v_and_b32_e32 v22, 0xffff, v2 +; SI-NEXT: s_waitcnt lgkmcnt(2) +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v5 +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v5 +; SI-NEXT: v_and_b32_e32 v2, 0xffff, v4 +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v7 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v7 +; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; SI-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v25, 16, v9 +; SI-NEXT: v_and_b32_e32 v24, 0xffff, v9 +; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v8 +; SI-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; SI-NEXT: v_lshrrev_b32_e32 v27, 16, v11 +; SI-NEXT: v_and_b32_e32 v26, 0xffff, v11 +; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v10 +; SI-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v29, 16, v13 +; SI-NEXT: v_and_b32_e32 v28, 0xffff, v13 +; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v12 +; SI-NEXT: v_and_b32_e32 v12, 0xffff, v12 +; SI-NEXT: v_lshrrev_b32_e32 v31, 16, v15 +; SI-NEXT: v_and_b32_e32 v30, 0xffff, v15 +; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v14 +; SI-NEXT: v_and_b32_e32 v14, 0xffff, v14 +; SI-NEXT: v_mov_b32_e32 v32, s0 +; SI-NEXT: ds_write2_b64 v32, v[14:15], v[30:31] offset0:14 offset1:15 +; SI-NEXT: ds_write2_b64 v32, v[12:13], v[28:29] offset0:12 offset1:13 +; SI-NEXT: ds_write2_b64 v32, v[10:11], v[26:27] offset0:10 offset1:11 +; SI-NEXT: ds_write2_b64 v32, v[8:9], v[24:25] offset0:8 offset1:9 +; SI-NEXT: ds_write2_b64 v32, v[6:7], v[4:5] offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v32, v[2:3], v[0:1] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v32, v[22:23], v[20:21] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v32, v[18:19], v[16:17] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v32i16_to_v32i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v24, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v24 offset1:1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v24 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v32, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v3 +; VI-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v3 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; VI-NO-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v2 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v1 +; VI-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v0 +; VI-NO-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v7 +; VI-NO-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v7 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6 +; VI-NO-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v6 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v21, 16, v5 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v24 offset0:4 offset1:5 +; VI-NO-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v5 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v23, 16, v4 +; VI-NO-DS128-NEXT: v_and_b32_e32 v22, 0xffff, v4 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v24 offset0:6 offset1:7 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v25, 16, v1 +; VI-NO-DS128-NEXT: v_and_b32_e32 v24, 0xffff, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v31, 16, v5 +; VI-NO-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v5 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; VI-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v27, 16, v3 +; VI-NO-DS128-NEXT: v_and_b32_e32 v26, 0xffff, v3 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; VI-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v7 +; VI-NO-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v7 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[4:5], v[30:31] offset0:12 offset1:13 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[6:7], v[28:29] offset0:14 offset1:15 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[2:3], v[26:27] offset0:10 offset1:11 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[0:1], v[24:25] offset0:8 offset1:9 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[22:23], v[20:21] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[18:19], v[16:17] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[14:15], v[12:13] offset1:1 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[10:11], v[8:9] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v32i16_to_v32i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v24, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v24 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v24 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v32, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v3 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v3 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v2 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v1 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v0 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v7 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v7 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v6 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v21, 16, v5 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v24 offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v5 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v23, 16, v4 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v22, 0xffff, v4 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v24 offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v25, 16, v1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v24, 0xffff, v1 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v31, 16, v5 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v5 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v27, 16, v3 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v26, 0xffff, v3 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v7 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v7 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[4:5], v[30:31] offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[6:7], v[28:29] offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[2:3], v[26:27] offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[0:1], v[24:25] offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[22:23], v[20:21] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[18:19], v[16:17] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[14:15], v[12:13] offset1:1 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[10:11], v[8:9] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v32i16_to_v32i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 105, @21, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 56(7.847271e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 40(5.605194e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.W, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Y, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Z, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.W, OQAP, +; EG-NEXT: MOV * T4.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Y, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Z, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.W, OQAP, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV T5.Y, OQAP, +; EG-NEXT: LSHR T5.W, T4.W, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: AND_INT T4.W, T4.W, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T5.Y, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T4.W, T5.Y, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T4.Z, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T4.W, T4.Z, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T4.Y, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T4.W, T4.Y, literal.x, +; EG-NEXT: MOV * T5.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T3.W, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T3.W, T3.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 52(7.286752e-44) +; EG-NEXT: ALU 84, @22, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T3.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T3.Y, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 44(6.165713e-44) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T3.W, T3.Y, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T2.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 36(5.044674e-44) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T2.W, T2.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 92(1.289195e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 88(1.233143e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 84(1.177091e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 76(1.064987e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T1.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 72(1.008935e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 68(9.528830e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 124(1.737610e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 120(1.681558e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 116(1.625506e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 108(1.513402e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 104(1.457350e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 100(1.401298e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v32i16_to_v32i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v20, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v20 +; VI-DS128-NEXT: ds_read_b128 v[4:7], v20 offset:16 +; VI-DS128-NEXT: ds_read_b128 v[16:19], v20 offset:32 +; VI-DS128-NEXT: ds_read_b128 v[20:23], v20 offset:48 +; VI-DS128-NEXT: v_mov_b32_e32 v32, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(3) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; VI-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v3 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v31, 16, v23 +; VI-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v23 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v22 +; VI-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v22 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v23, 16, v21 +; VI-DS128-NEXT: v_and_b32_e32 v22, 0xffff, v21 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v21, 16, v20 +; VI-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v20 +; VI-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v2 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; VI-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v1 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v7 +; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v7 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v6 +; VI-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v6 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v5 +; VI-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v5 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; VI-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v27, 16, v19 +; VI-DS128-NEXT: v_and_b32_e32 v26, 0xffff, v19 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v25, 16, v18 +; VI-DS128-NEXT: v_and_b32_e32 v24, 0xffff, v18 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v17 +; VI-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v17 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v16 +; VI-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; VI-DS128-NEXT: ds_write_b128 v32, v[20:23] offset:96 +; VI-DS128-NEXT: ds_write_b128 v32, v[28:31] offset:112 +; VI-DS128-NEXT: ds_write_b128 v32, v[16:19] offset:64 +; VI-DS128-NEXT: ds_write_b128 v32, v[24:27] offset:80 +; VI-DS128-NEXT: ds_write_b128 v32, v[4:7] offset:32 +; VI-DS128-NEXT: ds_write_b128 v32, v[12:15] offset:48 +; VI-DS128-NEXT: ds_write_b128 v32, v[0:3] +; VI-DS128-NEXT: ds_write_b128 v32, v[8:11] offset:16 +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v32i16_to_v32i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v20, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v20 +; GFX9-DS128-NEXT: ds_read_b128 v[4:7], v20 offset:16 +; GFX9-DS128-NEXT: ds_read_b128 v[16:19], v20 offset:32 +; GFX9-DS128-NEXT: ds_read_b128 v[20:23], v20 offset:48 +; GFX9-DS128-NEXT: v_mov_b32_e32 v32, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(3) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v3 +; GFX9-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v3 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v31, 16, v23 +; GFX9-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v23 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v22 +; GFX9-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v22 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v23, 16, v21 +; GFX9-DS128-NEXT: v_and_b32_e32 v22, 0xffff, v21 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v21, 16, v20 +; GFX9-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v20 +; GFX9-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v2 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX9-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v1 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v7 +; GFX9-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v7 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v6 +; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v6 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v5 +; GFX9-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v5 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX9-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v27, 16, v19 +; GFX9-DS128-NEXT: v_and_b32_e32 v26, 0xffff, v19 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v25, 16, v18 +; GFX9-DS128-NEXT: v_and_b32_e32 v24, 0xffff, v18 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v17 +; GFX9-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v17 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v16 +; GFX9-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[20:23] offset:96 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[28:31] offset:112 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[16:19] offset:64 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[24:27] offset:80 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[4:7] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[12:15] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[0:3] +; GFX9-DS128-NEXT: ds_write_b128 v32, v[8:11] offset:16 +; GFX9-DS128-NEXT: s_endpgm %load = load <32 x i16>, ptr addrspace(3) %in %ext = zext <32 x i16> %load to <32 x i32> store <32 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v32i16_to_v32i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v12, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v12 offset1:1 +; SI-NEXT: ds_read2_b64 v[4:7], v12 offset0:2 offset1:3 +; SI-NEXT: ds_read2_b64 v[8:11], v12 offset0:4 offset1:5 +; SI-NEXT: ds_read2_b64 v[12:15], v12 offset0:6 offset1:7 +; SI-NEXT: s_waitcnt lgkmcnt(3) +; SI-NEXT: v_ashrrev_i32_e32 v17, 16, v1 +; SI-NEXT: v_ashrrev_i32_e32 v19, 16, v0 +; SI-NEXT: v_ashrrev_i32_e32 v21, 16, v3 +; SI-NEXT: v_ashrrev_i32_e32 v23, 16, v2 +; SI-NEXT: v_bfe_i32 v16, v1, 0, 16 +; SI-NEXT: v_bfe_i32 v18, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v20, v3, 0, 16 +; SI-NEXT: v_bfe_i32 v22, v2, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(2) +; SI-NEXT: v_ashrrev_i32_e32 v1, 16, v5 +; SI-NEXT: v_ashrrev_i32_e32 v3, 16, v4 +; SI-NEXT: v_bfe_i32 v0, v5, 0, 16 +; SI-NEXT: v_bfe_i32 v2, v4, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v5, 16, v7 +; SI-NEXT: v_bfe_i32 v4, v7, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v7, 16, v6 +; SI-NEXT: v_bfe_i32 v6, v6, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_ashrrev_i32_e32 v25, 16, v9 +; SI-NEXT: v_bfe_i32 v24, v9, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v9, 16, v8 +; SI-NEXT: v_bfe_i32 v8, v8, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v27, 16, v11 +; SI-NEXT: v_bfe_i32 v26, v11, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v11, 16, v10 +; SI-NEXT: v_bfe_i32 v10, v10, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v29, 16, v13 +; SI-NEXT: v_bfe_i32 v28, v13, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v13, 16, v12 +; SI-NEXT: v_bfe_i32 v12, v12, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v31, 16, v15 +; SI-NEXT: v_bfe_i32 v30, v15, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v15, 16, v14 +; SI-NEXT: v_bfe_i32 v14, v14, 0, 16 +; SI-NEXT: v_mov_b32_e32 v32, s0 +; SI-NEXT: ds_write2_b64 v32, v[14:15], v[30:31] offset0:14 offset1:15 +; SI-NEXT: ds_write2_b64 v32, v[12:13], v[28:29] offset0:12 offset1:13 +; SI-NEXT: ds_write2_b64 v32, v[10:11], v[26:27] offset0:10 offset1:11 +; SI-NEXT: ds_write2_b64 v32, v[8:9], v[24:25] offset0:8 offset1:9 +; SI-NEXT: ds_write2_b64 v32, v[6:7], v[4:5] offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v32, v[2:3], v[0:1] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v32, v[22:23], v[20:21] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v32, v[18:19], v[16:17] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v32i16_to_v32i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v24, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v24 offset1:1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v24 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v32, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v3 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v2 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v1 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v7 +; VI-NO-DS128-NEXT: v_bfe_i32 v8, v3, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v10, v2, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v12, v1, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v6 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v5 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v7, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v18, v6, 0, 16 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v24 offset0:4 offset1:5 +; VI-NO-DS128-NEXT: v_bfe_i32 v20, v5, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v23, 16, v4 +; VI-NO-DS128-NEXT: v_bfe_i32 v22, v4, 0, 16 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v24 offset0:6 offset1:7 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v25, 16, v1 +; VI-NO-DS128-NEXT: v_bfe_i32 v24, v1, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; VI-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v31, 16, v5 +; VI-NO-DS128-NEXT: v_bfe_i32 v30, v5, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v4 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v27, 16, v3 +; VI-NO-DS128-NEXT: v_bfe_i32 v26, v3, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v2 +; VI-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v29, 16, v7 +; VI-NO-DS128-NEXT: v_bfe_i32 v28, v7, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v6 +; VI-NO-DS128-NEXT: v_bfe_i32 v6, v6, 0, 16 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[4:5], v[30:31] offset0:12 offset1:13 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[6:7], v[28:29] offset0:14 offset1:15 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[2:3], v[26:27] offset0:10 offset1:11 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[0:1], v[24:25] offset0:8 offset1:9 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[22:23], v[20:21] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[18:19], v[16:17] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[14:15], v[12:13] offset1:1 +; VI-NO-DS128-NEXT: ds_write2_b64 v32, v[10:11], v[8:9] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v32i16_to_v32i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v24, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v24 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v24 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v32, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v3 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v2 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v1 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v7 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v6 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v5 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v7, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v6, 0, 16 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v24 offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v20, v5, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v23, 16, v4 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v22, v4, 0, 16 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v24 offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v25, 16, v1 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v24, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v31, 16, v5 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v30, v5, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v4 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v27, 16, v3 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v26, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v2 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v29, 16, v7 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v28, v7, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v6 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v6, 0, 16 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[4:5], v[30:31] offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[6:7], v[28:29] offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[2:3], v[26:27] offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[0:1], v[24:25] offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[22:23], v[20:21] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[18:19], v[16:17] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[14:15], v[12:13] offset1:1 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v32, v[10:11], v[8:9] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v32i16_to_v32i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 101, @23, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 40(5.605194e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.W, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Y, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Z, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 56(7.847271e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.W, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Y, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Z, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.W, OQAP, +; EG-NEXT: LSHR * T5.W, T4.Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T6.W, KC0[2].Z, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T6.W +; EG-NEXT: MOV T5.Y, OQAP, +; EG-NEXT: LSHR T5.Z, T4.W, literal.x, +; EG-NEXT: BFE_INT T5.W, T5.W, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T6.Z, T0.Y, literal.x, +; EG-NEXT: BFE_INT T5.W, T5.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.Z, T0.Z, literal.x, +; EG-NEXT: BFE_INT T5.W, T6.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T6.Z, T0.W, literal.x, +; EG-NEXT: BFE_INT T5.W, T5.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.Z, T1.Y, literal.x, +; EG-NEXT: BFE_INT T5.W, T6.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T6.Z, T1.Z, literal.x, +; EG-NEXT: BFE_INT T5.W, T5.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 52(7.286752e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.Z, T1.W, literal.x, +; EG-NEXT: BFE_INT T5.W, T6.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 44(6.165713e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR * T6.Z, T2.Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ALU 89, @24, KC0[CB0:0-32], KC1[] +; EG-NEXT: BFE_INT T5.W, T5.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 36(5.044674e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.Z, T2.Z, literal.x, +; EG-NEXT: BFE_INT T5.W, T6.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 92(1.289195e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T6.Z, T2.W, literal.x, +; EG-NEXT: BFE_INT T5.W, T5.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 84(1.177091e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.Z, T3.Y, literal.x, +; EG-NEXT: BFE_INT T5.W, T6.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 76(1.064987e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T6.Z, T3.Z, literal.x, +; EG-NEXT: BFE_INT T5.W, T5.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 68(9.528830e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.Z, T3.W, literal.x, +; EG-NEXT: BFE_INT T5.W, T6.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 124(1.737610e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T6.Z, T4.Y, literal.x, +; EG-NEXT: BFE_INT T5.W, T5.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 116(1.625506e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.Z, T5.Y, literal.x, +; EG-NEXT: BFE_INT T5.W, T6.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 108(1.513402e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: BFE_INT T5.W, T5.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 100(1.401298e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: BFE_INT T5.W, T4.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: BFE_INT T4.W, T4.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: BFE_INT T4.W, T0.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: BFE_INT T4.W, T0.Z, 0.0, literal.x, +; EG-NEXT: MOV * T5.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: BFE_INT T0.W, T0.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T4.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T4.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T4.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T2.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 88(1.233143e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T2.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 80(1.121039e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T2.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 72(1.008935e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T3.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 64(8.968310e-44) +; EG-NEXT: ALU 16, @25, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T3.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 120(1.681558e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T3.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 112(1.569454e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T4.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 104(1.457350e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T5.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 96(1.345247e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v32i16_to_v32i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v24, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v24 +; VI-DS128-NEXT: ds_read_b128 v[4:7], v24 offset:16 +; VI-DS128-NEXT: ds_read_b128 v[20:23], v24 offset:32 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(2) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v3 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v2 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v1 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v0 +; VI-DS128-NEXT: v_bfe_i32 v10, v3, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v8, v2, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v14, v1, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v12, v0, 0, 16 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v7 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v6 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v5 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v4 +; VI-DS128-NEXT: v_bfe_i32 v2, v7, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v0, v6, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v18, v5, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; VI-DS128-NEXT: ds_read_b128 v[4:7], v24 offset:48 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v26, 16, v23 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v24, 16, v22 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v30, 16, v21 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v28, 16, v20 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v38, 16, v5 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v36, 16, v4 +; VI-DS128-NEXT: v_bfe_i32 v37, v5, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v35, v4, 0, 16 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: v_bfe_i32 v25, v23, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v23, v22, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v29, v21, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v27, v20, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v34, 16, v7 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v32, 16, v6 +; VI-DS128-NEXT: v_bfe_i32 v33, v7, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v31, v6, 0, 16 +; VI-DS128-NEXT: ds_write_b128 v4, v[35:38] offset:96 +; VI-DS128-NEXT: ds_write_b128 v4, v[31:34] offset:112 +; VI-DS128-NEXT: ds_write_b128 v4, v[27:30] offset:64 +; VI-DS128-NEXT: ds_write_b128 v4, v[23:26] offset:80 +; VI-DS128-NEXT: ds_write_b128 v4, v[16:19] offset:32 +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] offset:48 +; VI-DS128-NEXT: ds_write_b128 v4, v[12:15] +; VI-DS128-NEXT: ds_write_b128 v4, v[8:11] offset:16 +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v32i16_to_v32i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v24, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v24 +; GFX9-DS128-NEXT: ds_read_b128 v[4:7], v24 offset:16 +; GFX9-DS128-NEXT: ds_read_b128 v[20:23], v24 offset:32 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v3 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v2 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v1 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v0 +; GFX9-DS128-NEXT: v_bfe_i32 v10, v3, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v8, v2, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v14, v1, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v12, v0, 0, 16 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v7 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v6 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v5 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v4 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v7, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v0, v6, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v18, v5, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; GFX9-DS128-NEXT: ds_read_b128 v[4:7], v24 offset:48 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v26, 16, v23 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v24, 16, v22 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v30, 16, v21 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v28, 16, v20 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v38, 16, v5 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v36, 16, v4 +; GFX9-DS128-NEXT: v_bfe_i32 v37, v5, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v35, v4, 0, 16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: v_bfe_i32 v25, v23, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v23, v22, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v29, v21, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v27, v20, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v34, 16, v7 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v32, 16, v6 +; GFX9-DS128-NEXT: v_bfe_i32 v33, v7, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v31, v6, 0, 16 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[35:38] offset:96 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[31:34] offset:112 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[27:30] offset:64 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[23:26] offset:80 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[16:19] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[12:15] +; GFX9-DS128-NEXT: ds_write_b128 v4, v[8:11] offset:16 +; GFX9-DS128-NEXT: s_endpgm %load = load <32 x i16>, ptr addrspace(3) %in %ext = sext <32 x i16> %load to <32 x i32> store <32 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 -; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v64i16_to_v64i32: +; SI: ; %bb.0: +; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_mov_b32 s14, -1 +; SI-NEXT: s_mov_b32 s15, 0xe8f000 +; SI-NEXT: s_add_u32 s12, s12, s11 +; SI-NEXT: s_addc_u32 s13, s13, 0 +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v24, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v24 offset0:8 offset1:9 +; SI-NEXT: ds_read2_b64 v[4:7], v24 offset0:10 offset1:11 +; SI-NEXT: ds_read2_b64 v[12:15], v24 offset0:12 offset1:13 +; SI-NEXT: ds_read2_b64 v[8:11], v24 offset0:14 offset1:15 +; SI-NEXT: ds_read2_b64 v[20:23], v24 offset1:1 +; SI-NEXT: ds_read2_b64 v[16:19], v24 offset0:2 offset1:3 +; SI-NEXT: ds_read2_b64 v[34:37], v24 offset0:4 offset1:5 +; SI-NEXT: ds_read2_b64 v[38:41], v24 offset0:6 offset1:7 +; SI-NEXT: s_waitcnt lgkmcnt(7) +; SI-NEXT: v_lshrrev_b32_e32 v25, 16, v1 +; SI-NEXT: v_lshrrev_b32_e32 v27, 16, v0 +; SI-NEXT: v_lshrrev_b32_e32 v29, 16, v3 +; SI-NEXT: v_lshrrev_b32_e32 v31, 16, v2 +; SI-NEXT: s_waitcnt lgkmcnt(6) +; SI-NEXT: v_lshrrev_b32_e32 v33, 16, v5 +; SI-NEXT: v_and_b32_e32 v24, 0xffff, v1 +; SI-NEXT: buffer_store_dword v24, off, s[12:15], 0 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v25, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; SI-NEXT: v_and_b32_e32 v26, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v28, 0xffff, v3 +; SI-NEXT: v_and_b32_e32 v30, 0xffff, v2 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v25, 16, v4 +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v7 +; SI-NEXT: v_and_b32_e32 v32, 0xffff, v5 +; SI-NEXT: v_and_b32_e32 v24, 0xffff, v4 +; SI-NEXT: v_and_b32_e32 v2, 0xffff, v7 +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v6 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v6 +; SI-NEXT: s_waitcnt lgkmcnt(5) +; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v13 +; SI-NEXT: v_and_b32_e32 v6, 0xffff, v13 +; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v12 +; SI-NEXT: v_and_b32_e32 v12, 0xffff, v12 +; SI-NEXT: v_lshrrev_b32_e32 v43, 16, v15 +; SI-NEXT: v_and_b32_e32 v42, 0xffff, v15 +; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v14 +; SI-NEXT: v_and_b32_e32 v14, 0xffff, v14 +; SI-NEXT: s_waitcnt lgkmcnt(4) +; SI-NEXT: v_lshrrev_b32_e32 v45, 16, v9 +; SI-NEXT: v_and_b32_e32 v44, 0xffff, v9 +; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v8 +; SI-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; SI-NEXT: v_lshrrev_b32_e32 v47, 16, v11 +; SI-NEXT: v_and_b32_e32 v46, 0xffff, v11 +; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v10 +; SI-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; SI-NEXT: s_waitcnt lgkmcnt(3) +; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v21 +; SI-NEXT: v_and_b32_e32 v48, 0xffff, v21 +; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v20 +; SI-NEXT: v_and_b32_e32 v20, 0xffff, v20 +; SI-NEXT: v_lshrrev_b32_e32 v51, 16, v23 +; SI-NEXT: v_and_b32_e32 v50, 0xffff, v23 +; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v22 +; SI-NEXT: v_and_b32_e32 v22, 0xffff, v22 +; SI-NEXT: s_waitcnt lgkmcnt(2) +; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v17 +; SI-NEXT: v_and_b32_e32 v52, 0xffff, v17 +; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v16 +; SI-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; SI-NEXT: v_lshrrev_b32_e32 v55, 16, v19 +; SI-NEXT: v_and_b32_e32 v54, 0xffff, v19 +; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v18 +; SI-NEXT: v_and_b32_e32 v18, 0xffff, v18 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v57, 16, v35 +; SI-NEXT: v_and_b32_e32 v56, 0xffff, v35 +; SI-NEXT: v_lshrrev_b32_e32 v35, 16, v34 +; SI-NEXT: v_and_b32_e32 v34, 0xffff, v34 +; SI-NEXT: v_lshrrev_b32_e32 v59, 16, v37 +; SI-NEXT: v_and_b32_e32 v58, 0xffff, v37 +; SI-NEXT: v_lshrrev_b32_e32 v37, 16, v36 +; SI-NEXT: v_and_b32_e32 v36, 0xffff, v36 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v61, 16, v39 +; SI-NEXT: v_and_b32_e32 v60, 0xffff, v39 +; SI-NEXT: v_lshrrev_b32_e32 v39, 16, v38 +; SI-NEXT: v_and_b32_e32 v38, 0xffff, v38 +; SI-NEXT: v_lshrrev_b32_e32 v63, 16, v41 +; SI-NEXT: v_and_b32_e32 v62, 0xffff, v41 +; SI-NEXT: v_lshrrev_b32_e32 v41, 16, v40 +; SI-NEXT: v_and_b32_e32 v40, 0xffff, v40 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: ds_write2_b64 v0, v[40:41], v[62:63] offset0:14 offset1:15 +; SI-NEXT: ds_write2_b64 v0, v[38:39], v[60:61] offset0:12 offset1:13 +; SI-NEXT: ds_write2_b64 v0, v[36:37], v[58:59] offset0:10 offset1:11 +; SI-NEXT: ds_write2_b64 v0, v[34:35], v[56:57] offset0:8 offset1:9 +; SI-NEXT: ds_write2_b64 v0, v[18:19], v[54:55] offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v0, v[16:17], v[52:53] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v0, v[22:23], v[50:51] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v0, v[20:21], v[48:49] offset1:1 +; SI-NEXT: ds_write2_b64 v0, v[10:11], v[46:47] offset0:30 offset1:31 +; SI-NEXT: ds_write2_b64 v0, v[8:9], v[44:45] offset0:28 offset1:29 +; SI-NEXT: ds_write2_b64 v0, v[14:15], v[42:43] offset0:26 offset1:27 +; SI-NEXT: ds_write2_b64 v0, v[12:13], v[6:7] offset0:24 offset1:25 +; SI-NEXT: ds_write2_b64 v0, v[4:5], v[2:3] offset0:22 offset1:23 +; SI-NEXT: ds_write2_b64 v0, v[24:25], v[32:33] offset0:20 offset1:21 +; SI-NEXT: ds_write2_b64 v0, v[30:31], v[28:29] offset0:18 offset1:19 +; SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 ; 4-byte Folded Reload +; SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: ds_write2_b64 v0, v[26:27], v[1:2] offset0:16 offset1:17 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v64i16_to_v64i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 +; VI-NO-DS128-NEXT: s_mov_b32 s90, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v16, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[10:13], v16 offset1:1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[17:20], v16 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: s_mov_b32 s91, 0xe80000 +; VI-NO-DS128-NEXT: s_add_u32 s88, s88, s11 +; VI-NO-DS128-NEXT: s_addc_u32 s89, s89, 0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v11 +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v11 +; VI-NO-DS128-NEXT: buffer_store_dword v0, off, s[88:91], 0 ; 4-byte Folded Spill +; VI-NO-DS128-NEXT: buffer_store_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v10 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v13 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v12 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v18 +; VI-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v10 +; VI-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v13 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v12 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v17 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v20 +; VI-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v18 +; VI-NO-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v17 +; VI-NO-DS128-NEXT: ds_read2_b64 v[21:24], v16 offset0:4 offset1:5 +; VI-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v20 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v19 +; VI-NO-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v19 +; VI-NO-DS128-NEXT: ds_read2_b64 v[17:20], v16 offset0:6 offset1:7 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v26, 16, v22 +; VI-NO-DS128-NEXT: v_and_b32_e32 v25, 0xffff, v22 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v28, 16, v21 +; VI-NO-DS128-NEXT: v_and_b32_e32 v27, 0xffff, v21 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v30, 16, v24 +; VI-NO-DS128-NEXT: v_and_b32_e32 v29, 0xffff, v24 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v32, 16, v23 +; VI-NO-DS128-NEXT: v_and_b32_e32 v31, 0xffff, v23 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v34, 16, v18 +; VI-NO-DS128-NEXT: v_and_b32_e32 v33, 0xffff, v18 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v36, 16, v17 +; VI-NO-DS128-NEXT: v_and_b32_e32 v35, 0xffff, v17 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v38, 16, v20 +; VI-NO-DS128-NEXT: ds_read2_b64 v[21:24], v16 offset0:8 offset1:9 +; VI-NO-DS128-NEXT: v_and_b32_e32 v37, 0xffff, v20 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v19 +; VI-NO-DS128-NEXT: v_and_b32_e32 v39, 0xffff, v19 +; VI-NO-DS128-NEXT: ds_read2_b64 v[17:20], v16 offset0:10 offset1:11 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v22 +; VI-NO-DS128-NEXT: v_and_b32_e32 v41, 0xffff, v22 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v44, 16, v21 +; VI-NO-DS128-NEXT: v_and_b32_e32 v43, 0xffff, v21 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v46, 16, v24 +; VI-NO-DS128-NEXT: v_and_b32_e32 v45, 0xffff, v24 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v23 +; VI-NO-DS128-NEXT: v_and_b32_e32 v47, 0xffff, v23 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v18 +; VI-NO-DS128-NEXT: v_and_b32_e32 v49, 0xffff, v18 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v52, 16, v17 +; VI-NO-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v17 +; VI-NO-DS128-NEXT: ds_read2_b64 v[21:24], v16 offset0:12 offset1:13 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v56, 16, v19 +; VI-NO-DS128-NEXT: v_and_b32_e32 v55, 0xffff, v19 +; VI-NO-DS128-NEXT: ds_read2_b64 v[16:19], v16 offset0:14 offset1:15 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v54, 16, v20 +; VI-NO-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v20 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v58, 16, v22 +; VI-NO-DS128-NEXT: v_and_b32_e32 v57, 0xffff, v22 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v20, 16, v19 +; VI-NO-DS128-NEXT: v_and_b32_e32 v19, 0xffff, v19 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v18 +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v18 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v18, s0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v22, 16, v21 +; VI-NO-DS128-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v60, 16, v24 +; VI-NO-DS128-NEXT: v_and_b32_e32 v59, 0xffff, v24 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v24, 16, v23 +; VI-NO-DS128-NEXT: v_and_b32_e32 v23, 0xffff, v23 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v62, 16, v17 +; VI-NO-DS128-NEXT: v_and_b32_e32 v61, 0xffff, v17 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v16 +; VI-NO-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[0:1], v[19:20] offset0:30 offset1:31 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[16:17], v[61:62] offset0:28 offset1:29 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[23:24], v[59:60] offset0:26 offset1:27 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[21:22], v[57:58] offset0:24 offset1:25 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[55:56], v[53:54] offset0:22 offset1:23 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[51:52], v[49:50] offset0:20 offset1:21 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[47:48], v[45:46] offset0:18 offset1:19 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[43:44], v[41:42] offset0:16 offset1:17 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[39:40], v[37:38] offset0:14 offset1:15 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[35:36], v[33:34] offset0:12 offset1:13 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[31:32], v[29:30] offset0:10 offset1:11 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[27:28], v[25:26] offset0:8 offset1:9 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[14:15], v[12:13] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[10:11], v[8:9] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[6:7], v[4:5] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: buffer_load_dword v0, off, s[88:91], 0 ; 4-byte Folded Reload +; VI-NO-DS128-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload +; VI-NO-DS128-NEXT: s_waitcnt vmcnt(0) +; VI-NO-DS128-NEXT: ds_write2_b64 v18, v[2:3], v[0:1] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v64i16_to_v64i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-NO-DS128-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-NO-DS128-NEXT: s_mov_b32 s14, -1 +; GFX9-NO-DS128-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v56, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[10:13], v56 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[14:17], v56 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: s_add_u32 s12, s12, s11 +; GFX9-NO-DS128-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v11 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v11 +; GFX9-NO-DS128-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GFX9-NO-DS128-NEXT: s_nop 0 +; GFX9-NO-DS128-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[18:21], v56 offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[22:25], v56 offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v10 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v13 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v12 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(2) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v15 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v10 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v13 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v12 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v14 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v17 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v15 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v14 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v17 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v16 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v16 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v27, 16, v19 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v26, 0xffff, v19 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v18 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v18 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v31, 16, v21 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v21 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v33, 16, v20 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v32, 0xffff, v20 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v35, 16, v23 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v34, 0xffff, v23 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v37, 16, v22 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v36, 0xffff, v22 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[16:19], v56 offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[20:23], v56 offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v39, 16, v25 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v38, 0xffff, v25 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v41, 16, v17 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v40, 0xffff, v17 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v43, 16, v16 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v42, 0xffff, v16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v45, 16, v19 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v44, 0xffff, v19 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v47, 16, v18 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v46, 0xffff, v18 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v49, 16, v21 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v48, 0xffff, v21 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v51, 16, v20 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v50, 0xffff, v20 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v53, 16, v23 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[16:19], v56 offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v52, 0xffff, v23 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v55, 16, v22 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v54, 0xffff, v22 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[20:23], v56 offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v25, 16, v24 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v24, 0xffff, v24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v57, 16, v17 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v56, 0xffff, v17 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v63, 16, v23 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v62, 0xffff, v23 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v23, 16, v22 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v22, 0xffff, v22 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v16 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v59, 16, v19 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v58, 0xffff, v19 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v18 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v18 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v61, 16, v21 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v60, 0xffff, v21 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v21, 16, v20 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v20 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[22:23], v[62:63] offset0:30 offset1:31 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[20:21], v[60:61] offset0:28 offset1:29 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[18:19], v[58:59] offset0:26 offset1:27 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[16:17], v[56:57] offset0:24 offset1:25 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[54:55], v[52:53] offset0:22 offset1:23 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[50:51], v[48:49] offset0:20 offset1:21 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[46:47], v[44:45] offset0:18 offset1:19 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[42:43], v[40:41] offset0:16 offset1:17 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[24:25], v[38:39] offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[36:37], v[34:35] offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[32:33], v[30:31] offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[28:29], v[26:27] offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[14:15], v[12:13] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[10:11], v[8:9] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[6:7], v[4:5] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload +; GFX9-NO-DS128-NEXT: buffer_load_dword v5, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GFX9-NO-DS128-NEXT: s_waitcnt vmcnt(0) +; GFX9-NO-DS128-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v64i16_to_v64i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 116, @26, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 116(1.625506e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 120(1.681558e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 124(1.737610e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 100(1.401298e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 104(1.457350e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 108(1.513402e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.W, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 84(1.177091e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Y, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 88(1.233143e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Z, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 92(1.289195e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.W, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Y, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 68(9.528830e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Z, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 72(1.008935e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.W, OQAP, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Z, literal.x, +; EG-NEXT: 76(1.064987e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV T5.Y, OQAP, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Z, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV T5.Z, OQAP, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Z, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV T5.W, OQAP, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Z, literal.x, +; EG-NEXT: 56(7.847271e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T6.W +; EG-NEXT: MOV T6.Y, OQAP, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Z, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T6.W +; EG-NEXT: MOV T6.Z, OQAP, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T6.W +; EG-NEXT: MOV T6.W, OQAP, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Z, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T7.W +; EG-NEXT: MOV T7.Y, OQAP, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Z, literal.x, +; EG-NEXT: 40(5.605194e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T7.W +; EG-NEXT: MOV T7.Z, OQAP, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Z, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T7.W +; EG-NEXT: MOV T7.W, OQAP, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T8.W +; EG-NEXT: MOV T8.Y, OQAP, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T8.W +; EG-NEXT: MOV T8.Z, OQAP, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T8.W +; EG-NEXT: MOV T8.W, OQAP, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T9.W +; EG-NEXT: MOV T9.Y, OQAP, +; EG-NEXT: MOV * T9.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T9.W +; EG-NEXT: MOV T9.Z, OQAP, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: ALU 95, @27, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_READ_RET * OQAP, T9.W +; EG-NEXT: MOV T9.W, OQAP, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T10.W +; EG-NEXT: MOV T10.Y, OQAP, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T10.W +; EG-NEXT: MOV T10.Z, OQAP, +; EG-NEXT: LSHR T10.W, T10.Y, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: AND_INT T10.W, T10.Y, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T10.W, T10.Z, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: AND_INT T10.W, T10.Z, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T10.W, T9.W, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: AND_INT T9.W, T9.W, literal.x, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: LSHR T9.W, T9.Z, literal.x, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: AND_INT T9.W, T9.Z, literal.x, +; EG-NEXT: MOV * T10.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: LSHR T9.W, T9.Y, literal.x, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: AND_INT T9.W, T9.Y, literal.x, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: LSHR T9.W, T8.W, literal.x, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 52(7.286752e-44) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: AND_INT T8.W, T8.W, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T9.W, T8.W, +; EG-NEXT: LSHR T8.W, T8.Z, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 44(6.165713e-44) +; EG-NEXT: LDS_WRITE * T9.W, T8.W, +; EG-NEXT: AND_INT T8.W, T8.Z, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T9.W, T8.W, +; EG-NEXT: LSHR T8.W, T8.Y, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 36(5.044674e-44) +; EG-NEXT: LDS_WRITE * T9.W, T8.W, +; EG-NEXT: AND_INT T8.W, T8.Y, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T9.W, T8.W, +; EG-NEXT: LSHR T8.W, T7.W, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 92(1.289195e-43) +; EG-NEXT: LDS_WRITE * T9.W, T8.W, +; EG-NEXT: AND_INT T7.W, T7.W, literal.x, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 88(1.233143e-43) +; EG-NEXT: LDS_WRITE * T8.W, T7.W, +; EG-NEXT: LSHR T7.W, T7.Z, literal.x, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 84(1.177091e-43) +; EG-NEXT: LDS_WRITE * T8.W, T7.W, +; EG-NEXT: AND_INT T7.W, T7.Z, literal.x, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) +; EG-NEXT: LDS_WRITE * T8.W, T7.W, +; EG-NEXT: LSHR T7.W, T7.Y, literal.x, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 76(1.064987e-43) +; EG-NEXT: LDS_WRITE * T8.W, T7.W, +; EG-NEXT: AND_INT * T7.W, T7.Y, literal.x, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: ALU 93, @28, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T8.W, KC0[2].Y, literal.x, +; EG-NEXT: 72(1.008935e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T8.W, T7.W, +; EG-NEXT: LSHR T7.W, T6.W, literal.x, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 68(9.528830e-44) +; EG-NEXT: LDS_WRITE * T8.W, T7.W, +; EG-NEXT: AND_INT T6.W, T6.W, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: LSHR T6.W, T6.Z, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 124(1.737610e-43) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: AND_INT T6.W, T6.Z, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 120(1.681558e-43) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: LSHR T6.W, T6.Y, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 116(1.625506e-43) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: AND_INT T6.W, T6.Y, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: LSHR T6.W, T5.W, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 108(1.513402e-43) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: AND_INT T5.W, T5.W, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 104(1.457350e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.W, T5.Z, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 100(1.401298e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: AND_INT T5.W, T5.Z, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.W, T5.Y, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 156(2.186026e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: AND_INT T5.W, T5.Y, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 152(2.129974e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: LSHR T5.W, T4.W, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 148(2.073922e-43) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: AND_INT T4.W, T4.W, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T4.Z, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 140(1.961818e-43) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T4.W, T4.Z, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 136(1.905766e-43) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T4.Y, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 132(1.849714e-43) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T4.W, T4.Y, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T3.W, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 188(2.634441e-43) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T3.W, T3.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 184(2.578389e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 180(2.522337e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T3.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T3.Y, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 172(2.410233e-43) +; EG-NEXT: ALU 76, @29, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T3.W, T3.Y, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 168(2.354181e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T2.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 164(2.298129e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T2.W, T2.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 220(3.082857e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 216(3.026805e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 212(2.970753e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 204(2.858649e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T1.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 200(2.802597e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 196(2.746545e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 252(3.531272e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 248(3.475220e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 244(3.419168e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 240(3.363116e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 236(3.307064e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 232(3.251012e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 228(3.194960e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 224(3.138909e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v64i16_to_v64i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 +; VI-DS128-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 +; VI-DS128-NEXT: s_mov_b32 s90, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b128 v[8:11], v0 +; VI-DS128-NEXT: ds_read_b128 v[16:19], v0 offset:16 +; VI-DS128-NEXT: s_mov_b32 s91, 0xe80000 +; VI-DS128-NEXT: s_add_u32 s88, s88, s11 +; VI-DS128-NEXT: s_addc_u32 s89, s89, 0 +; VI-DS128-NEXT: ds_read_b128 v[20:23], v0 offset:32 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v19 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v18 +; VI-DS128-NEXT: v_and_b32_e32 v3, 0xffff, v19 +; VI-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v18 +; VI-DS128-NEXT: buffer_store_dword v1, off, s[88:91], 0 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v2, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v4, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill +; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v17 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v16 +; VI-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v17 +; VI-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v16 +; VI-DS128-NEXT: buffer_store_dword v4, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v5, off, s[88:91], 0 offset:20 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v6, off, s[88:91], 0 offset:24 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v7, off, s[88:91], 0 offset:28 ; 4-byte Folded Spill +; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:48 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v23 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v22 +; VI-DS128-NEXT: v_and_b32_e32 v3, 0xffff, v23 +; VI-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v22 +; VI-DS128-NEXT: buffer_store_dword v1, off, s[88:91], 0 offset:32 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v2, off, s[88:91], 0 offset:36 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:40 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v4, off, s[88:91], 0 offset:44 ; 4-byte Folded Spill +; VI-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v21 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v20 +; VI-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v21 +; VI-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v20 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v23, 16, v27 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v21, 16, v26 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v35, 16, v25 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v33, 16, v24 +; VI-DS128-NEXT: v_and_b32_e32 v22, 0xffff, v27 +; VI-DS128-NEXT: ds_read_b128 v[36:39], v0 offset:64 +; VI-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v26 +; VI-DS128-NEXT: v_and_b32_e32 v34, 0xffff, v25 +; VI-DS128-NEXT: v_and_b32_e32 v32, 0xffff, v24 +; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:80 +; VI-DS128-NEXT: ds_read_b128 v[55:58], v0 offset:96 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v11 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v10 +; VI-DS128-NEXT: v_mov_b32_e32 v31, v15 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v27 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v26 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v54, 16, v25 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v52, 16, v24 +; VI-DS128-NEXT: v_and_b32_e32 v49, 0xffff, v27 +; VI-DS128-NEXT: v_and_b32_e32 v47, 0xffff, v26 +; VI-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v25 +; VI-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v24 +; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:112 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8 +; VI-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v11 +; VI-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v10 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v25 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v24 +; VI-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v25 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v24 +; VI-DS128-NEXT: v_mov_b32_e32 v24, s0 +; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9 +; VI-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v39 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v38 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v46, 16, v37 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v44, 16, v36 +; VI-DS128-NEXT: v_and_b32_e32 v41, 0xffff, v39 +; VI-DS128-NEXT: v_and_b32_e32 v39, 0xffff, v38 +; VI-DS128-NEXT: v_and_b32_e32 v45, 0xffff, v37 +; VI-DS128-NEXT: v_and_b32_e32 v43, 0xffff, v36 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v61, 16, v58 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v59, 16, v57 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v56 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v55 +; VI-DS128-NEXT: v_and_b32_e32 v60, 0xffff, v58 +; VI-DS128-NEXT: v_and_b32_e32 v58, 0xffff, v57 +; VI-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v56 +; VI-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v55 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v27 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v26 +; VI-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v27 +; VI-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v26 +; VI-DS128-NEXT: ds_write_b128 v24, v[0:3] offset:224 +; VI-DS128-NEXT: ds_write_b128 v24, v[4:7] offset:240 +; VI-DS128-NEXT: ds_write_b128 v24, v[8:11] offset:192 +; VI-DS128-NEXT: ds_write_b128 v24, v[58:61] offset:208 +; VI-DS128-NEXT: ds_write_b128 v24, v[51:54] offset:160 +; VI-DS128-NEXT: ds_write_b128 v24, v[47:50] offset:176 +; VI-DS128-NEXT: ds_write_b128 v24, v[43:46] offset:128 +; VI-DS128-NEXT: ds_write_b128 v24, v[39:42] offset:144 +; VI-DS128-NEXT: ds_write_b128 v24, v[32:35] offset:96 +; VI-DS128-NEXT: ds_write_b128 v24, v[20:23] offset:112 +; VI-DS128-NEXT: ds_write_b128 v24, v[16:19] offset:64 +; VI-DS128-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:32 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:36 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:40 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:44 ; 4-byte Folded Reload +; VI-DS128-NEXT: s_waitcnt vmcnt(0) +; VI-DS128-NEXT: ds_write_b128 v24, v[0:3] offset:80 +; VI-DS128-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:20 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:24 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:28 ; 4-byte Folded Reload +; VI-DS128-NEXT: s_waitcnt vmcnt(0) +; VI-DS128-NEXT: ds_write_b128 v24, v[0:3] offset:32 +; VI-DS128-NEXT: buffer_load_dword v0, off, s[88:91], 0 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload +; VI-DS128-NEXT: s_waitcnt vmcnt(0) +; VI-DS128-NEXT: ds_write_b128 v24, v[0:3] offset:48 +; VI-DS128-NEXT: ds_write_b128 v24, v[12:15] +; VI-DS128-NEXT: ds_write_b128 v24, v[28:31] offset:16 +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v64i16_to_v64i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-DS128-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-DS128-NEXT: s_mov_b32 s14, -1 +; GFX9-DS128-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[8:11], v0 +; GFX9-DS128-NEXT: ds_read_b128 v[16:19], v0 offset:16 +; GFX9-DS128-NEXT: s_add_u32 s12, s12, s11 +; GFX9-DS128-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-DS128-NEXT: ds_read_b128 v[20:23], v0 offset:32 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v11 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v19 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v18 +; GFX9-DS128-NEXT: v_and_b32_e32 v3, 0xffff, v19 +; GFX9-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v18 +; GFX9-DS128-NEXT: buffer_store_dword v1, off, s[12:15], 0 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: s_nop 0 +; GFX9-DS128-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v17 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v16 +; GFX9-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v17 +; GFX9-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v16 +; GFX9-DS128-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: s_nop 0 +; GFX9-DS128-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v7, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:48 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v23 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v22 +; GFX9-DS128-NEXT: v_and_b32_e32 v3, 0xffff, v23 +; GFX9-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v22 +; GFX9-DS128-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: s_nop 0 +; GFX9-DS128-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:36 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:40 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:44 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v21 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v20 +; GFX9-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v21 +; GFX9-DS128-NEXT: v_and_b32_e32 v16, 0xffff, v20 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v23, 16, v27 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v21, 16, v26 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v35, 16, v25 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v33, 16, v24 +; GFX9-DS128-NEXT: v_and_b32_e32 v22, 0xffff, v27 +; GFX9-DS128-NEXT: ds_read_b128 v[36:39], v0 offset:64 +; GFX9-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v26 +; GFX9-DS128-NEXT: v_and_b32_e32 v34, 0xffff, v25 +; GFX9-DS128-NEXT: v_and_b32_e32 v32, 0xffff, v24 +; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:80 +; GFX9-DS128-NEXT: ds_read_b128 v[55:58], v0 offset:96 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v10 +; GFX9-DS128-NEXT: v_mov_b32_e32 v31, v15 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v27 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v26 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v54, 16, v25 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v52, 16, v24 +; GFX9-DS128-NEXT: v_and_b32_e32 v49, 0xffff, v27 +; GFX9-DS128-NEXT: v_and_b32_e32 v47, 0xffff, v26 +; GFX9-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v25 +; GFX9-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v24 +; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:112 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8 +; GFX9-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v11 +; GFX9-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v10 +; GFX9-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v25 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v24 +; GFX9-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v25 +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v24 +; GFX9-DS128-NEXT: v_mov_b32_e32 v24, s0 +; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v39 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v38 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v46, 16, v37 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v44, 16, v36 +; GFX9-DS128-NEXT: v_and_b32_e32 v41, 0xffff, v39 +; GFX9-DS128-NEXT: v_and_b32_e32 v39, 0xffff, v38 +; GFX9-DS128-NEXT: v_and_b32_e32 v45, 0xffff, v37 +; GFX9-DS128-NEXT: v_and_b32_e32 v43, 0xffff, v36 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v61, 16, v58 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v59, 16, v57 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v56 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v55 +; GFX9-DS128-NEXT: v_and_b32_e32 v60, 0xffff, v58 +; GFX9-DS128-NEXT: v_and_b32_e32 v58, 0xffff, v57 +; GFX9-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v56 +; GFX9-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v55 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v27 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v26 +; GFX9-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v27 +; GFX9-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v26 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[0:3] offset:224 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[4:7] offset:240 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[8:11] offset:192 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[58:61] offset:208 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[51:54] offset:160 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[47:50] offset:176 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[43:46] offset:128 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[39:42] offset:144 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[32:35] offset:96 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[20:23] offset:112 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[16:19] offset:64 +; GFX9-DS128-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:36 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:40 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:44 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: s_waitcnt vmcnt(0) +; GFX9-DS128-NEXT: ds_write_b128 v24, v[0:3] offset:80 +; GFX9-DS128-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: s_waitcnt vmcnt(0) +; GFX9-DS128-NEXT: ds_write_b128 v24, v[0:3] offset:32 +; GFX9-DS128-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: s_waitcnt vmcnt(0) +; GFX9-DS128-NEXT: ds_write_b128 v24, v[0:3] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v24, v[12:15] +; GFX9-DS128-NEXT: ds_write_b128 v24, v[28:31] offset:16 +; GFX9-DS128-NEXT: s_endpgm %load = load <64 x i16>, ptr addrspace(3) %in %ext = zext <64 x i16> %load to <64 x i32> store <64 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v64i16_to_v64i32: +; SI: ; %bb.0: +; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_mov_b32 s14, -1 +; SI-NEXT: s_mov_b32 s15, 0xe8f000 +; SI-NEXT: s_add_u32 s12, s12, s11 +; SI-NEXT: s_addc_u32 s13, s13, 0 +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v20, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[4:7], v20 offset0:8 offset1:9 +; SI-NEXT: ds_read2_b64 v[0:3], v20 offset0:10 offset1:11 +; SI-NEXT: ds_read2_b64 v[8:11], v20 offset0:12 offset1:13 +; SI-NEXT: ds_read2_b64 v[12:15], v20 offset0:14 offset1:15 +; SI-NEXT: ds_read2_b64 v[16:19], v20 offset1:1 +; SI-NEXT: ds_read2_b64 v[30:33], v20 offset0:2 offset1:3 +; SI-NEXT: ds_read2_b64 v[34:37], v20 offset0:4 offset1:5 +; SI-NEXT: ds_read2_b64 v[38:41], v20 offset0:6 offset1:7 +; SI-NEXT: s_waitcnt lgkmcnt(7) +; SI-NEXT: v_ashrrev_i32_e32 v21, 16, v5 +; SI-NEXT: v_ashrrev_i32_e32 v23, 16, v4 +; SI-NEXT: v_ashrrev_i32_e32 v25, 16, v7 +; SI-NEXT: v_ashrrev_i32_e32 v27, 16, v6 +; SI-NEXT: s_waitcnt lgkmcnt(6) +; SI-NEXT: v_ashrrev_i32_e32 v29, 16, v1 +; SI-NEXT: v_bfe_i32 v20, v5, 0, 16 +; SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; SI-NEXT: v_bfe_i32 v22, v4, 0, 16 +; SI-NEXT: v_bfe_i32 v24, v7, 0, 16 +; SI-NEXT: v_bfe_i32 v26, v6, 0, 16 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v21, 16, v0 +; SI-NEXT: v_ashrrev_i32_e32 v7, 16, v3 +; SI-NEXT: v_bfe_i32 v28, v1, 0, 16 +; SI-NEXT: v_bfe_i32 v20, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v6, v3, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v5, 16, v2 +; SI-NEXT: v_bfe_i32 v4, v2, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(5) +; SI-NEXT: v_ashrrev_i32_e32 v3, 16, v9 +; SI-NEXT: v_bfe_i32 v2, v9, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v9, 16, v8 +; SI-NEXT: v_bfe_i32 v8, v8, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v43, 16, v11 +; SI-NEXT: v_bfe_i32 v42, v11, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v11, 16, v10 +; SI-NEXT: v_bfe_i32 v10, v10, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(4) +; SI-NEXT: v_ashrrev_i32_e32 v45, 16, v13 +; SI-NEXT: v_bfe_i32 v44, v13, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v13, 16, v12 +; SI-NEXT: v_bfe_i32 v12, v12, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v47, 16, v15 +; SI-NEXT: v_bfe_i32 v46, v15, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v15, 16, v14 +; SI-NEXT: v_bfe_i32 v14, v14, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(3) +; SI-NEXT: v_ashrrev_i32_e32 v49, 16, v17 +; SI-NEXT: v_bfe_i32 v48, v17, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v17, 16, v16 +; SI-NEXT: v_bfe_i32 v16, v16, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v51, 16, v19 +; SI-NEXT: v_bfe_i32 v50, v19, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v19, 16, v18 +; SI-NEXT: v_bfe_i32 v18, v18, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(2) +; SI-NEXT: v_ashrrev_i32_e32 v53, 16, v31 +; SI-NEXT: v_bfe_i32 v52, v31, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v31, 16, v30 +; SI-NEXT: v_bfe_i32 v30, v30, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v55, 16, v33 +; SI-NEXT: v_bfe_i32 v54, v33, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v33, 16, v32 +; SI-NEXT: v_bfe_i32 v32, v32, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_ashrrev_i32_e32 v57, 16, v35 +; SI-NEXT: v_bfe_i32 v56, v35, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v35, 16, v34 +; SI-NEXT: v_bfe_i32 v34, v34, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v59, 16, v37 +; SI-NEXT: v_bfe_i32 v58, v37, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v37, 16, v36 +; SI-NEXT: v_bfe_i32 v36, v36, 0, 16 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v61, 16, v39 +; SI-NEXT: v_bfe_i32 v60, v39, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v39, 16, v38 +; SI-NEXT: v_bfe_i32 v38, v38, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v63, 16, v41 +; SI-NEXT: v_bfe_i32 v62, v41, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v41, 16, v40 +; SI-NEXT: v_bfe_i32 v40, v40, 0, 16 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: ds_write2_b64 v0, v[40:41], v[62:63] offset0:14 offset1:15 +; SI-NEXT: ds_write2_b64 v0, v[38:39], v[60:61] offset0:12 offset1:13 +; SI-NEXT: ds_write2_b64 v0, v[36:37], v[58:59] offset0:10 offset1:11 +; SI-NEXT: ds_write2_b64 v0, v[34:35], v[56:57] offset0:8 offset1:9 +; SI-NEXT: ds_write2_b64 v0, v[32:33], v[54:55] offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v0, v[30:31], v[52:53] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v0, v[18:19], v[50:51] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v0, v[16:17], v[48:49] offset1:1 +; SI-NEXT: ds_write2_b64 v0, v[14:15], v[46:47] offset0:30 offset1:31 +; SI-NEXT: ds_write2_b64 v0, v[12:13], v[44:45] offset0:28 offset1:29 +; SI-NEXT: ds_write2_b64 v0, v[10:11], v[42:43] offset0:26 offset1:27 +; SI-NEXT: ds_write2_b64 v0, v[8:9], v[2:3] offset0:24 offset1:25 +; SI-NEXT: ds_write2_b64 v0, v[4:5], v[6:7] offset0:22 offset1:23 +; SI-NEXT: ds_write2_b64 v0, v[20:21], v[28:29] offset0:20 offset1:21 +; SI-NEXT: ds_write2_b64 v0, v[26:27], v[24:25] offset0:18 offset1:19 +; SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 ; 4-byte Folded Reload +; SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: ds_write2_b64 v0, v[22:23], v[1:2] offset0:16 offset1:17 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v64i16_to_v64i32: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 +; VI-NO-DS128-NEXT: s_mov_b32 s90, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v28, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[10:13], v28 offset1:1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[14:17], v28 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: s_mov_b32 s91, 0xe80000 +; VI-NO-DS128-NEXT: s_add_u32 s88, s88, s11 +; VI-NO-DS128-NEXT: s_addc_u32 s89, s89, 0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v11 +; VI-NO-DS128-NEXT: v_bfe_i32 v0, v11, 0, 16 +; VI-NO-DS128-NEXT: buffer_store_dword v0, off, s[88:91], 0 ; 4-byte Folded Spill +; VI-NO-DS128-NEXT: buffer_store_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill +; VI-NO-DS128-NEXT: ds_read2_b64 v[20:23], v28 offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_read2_b64 v[29:32], v28 offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_read2_b64 v[33:36], v28 offset0:8 offset1:9 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v10 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v13 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v12 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v25, 16, v30 +; VI-NO-DS128-NEXT: v_bfe_i32 v24, v30, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v27, 16, v29 +; VI-NO-DS128-NEXT: v_bfe_i32 v26, v29, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v38, 16, v32 +; VI-NO-DS128-NEXT: v_bfe_i32 v37, v32, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v40, 16, v31 +; VI-NO-DS128-NEXT: v_bfe_i32 v39, v31, 0, 16 +; VI-NO-DS128-NEXT: ds_read2_b64 v[29:32], v28 offset0:10 offset1:11 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v42, 16, v34 +; VI-NO-DS128-NEXT: v_bfe_i32 v41, v34, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v44, 16, v33 +; VI-NO-DS128-NEXT: v_bfe_i32 v43, v33, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v46, 16, v36 +; VI-NO-DS128-NEXT: v_bfe_i32 v45, v36, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v48, 16, v35 +; VI-NO-DS128-NEXT: v_bfe_i32 v47, v35, 0, 16 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v50, 16, v30 +; VI-NO-DS128-NEXT: v_bfe_i32 v49, v30, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v52, 16, v29 +; VI-NO-DS128-NEXT: v_bfe_i32 v51, v29, 0, 16 +; VI-NO-DS128-NEXT: ds_read2_b64 v[33:36], v28 offset0:12 offset1:13 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v56, 16, v31 +; VI-NO-DS128-NEXT: v_bfe_i32 v55, v31, 0, 16 +; VI-NO-DS128-NEXT: ds_read2_b64 v[28:31], v28 offset0:14 offset1:15 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v54, 16, v32 +; VI-NO-DS128-NEXT: v_bfe_i32 v53, v32, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v15 +; VI-NO-DS128-NEXT: v_bfe_i32 v2, v10, 0, 16 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v32, 16, v31 +; VI-NO-DS128-NEXT: v_bfe_i32 v31, v31, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v30 +; VI-NO-DS128-NEXT: v_bfe_i32 v0, v30, 0, 16 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v30, s0 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v13, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v6, v12, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v14 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v17 +; VI-NO-DS128-NEXT: v_bfe_i32 v8, v15, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v10, v14, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v12, v17, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v16 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v16, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v21 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v21, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v20 +; VI-NO-DS128-NEXT: v_bfe_i32 v18, v20, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v23 +; VI-NO-DS128-NEXT: v_bfe_i32 v20, v23, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v23, 16, v22 +; VI-NO-DS128-NEXT: v_bfe_i32 v22, v22, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v58, 16, v34 +; VI-NO-DS128-NEXT: v_bfe_i32 v57, v34, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v34, 16, v33 +; VI-NO-DS128-NEXT: v_bfe_i32 v33, v33, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v60, 16, v36 +; VI-NO-DS128-NEXT: v_bfe_i32 v59, v36, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v36, 16, v35 +; VI-NO-DS128-NEXT: v_bfe_i32 v35, v35, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v62, 16, v29 +; VI-NO-DS128-NEXT: v_bfe_i32 v61, v29, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v29, 16, v28 +; VI-NO-DS128-NEXT: v_bfe_i32 v28, v28, 0, 16 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[0:1], v[31:32] offset0:30 offset1:31 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[28:29], v[61:62] offset0:28 offset1:29 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[35:36], v[59:60] offset0:26 offset1:27 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[33:34], v[57:58] offset0:24 offset1:25 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[55:56], v[53:54] offset0:22 offset1:23 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[51:52], v[49:50] offset0:20 offset1:21 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[47:48], v[45:46] offset0:18 offset1:19 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[43:44], v[41:42] offset0:16 offset1:17 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[39:40], v[37:38] offset0:14 offset1:15 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[26:27], v[24:25] offset0:12 offset1:13 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[22:23], v[20:21] offset0:10 offset1:11 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[18:19], v[16:17] offset0:8 offset1:9 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[14:15], v[12:13] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[10:11], v[8:9] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[6:7], v[4:5] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: buffer_load_dword v0, off, s[88:91], 0 ; 4-byte Folded Reload +; VI-NO-DS128-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload +; VI-NO-DS128-NEXT: s_waitcnt vmcnt(0) +; VI-NO-DS128-NEXT: ds_write2_b64 v30, v[2:3], v[0:1] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v64i16_to_v64i32: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-NO-DS128-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-NO-DS128-NEXT: s_mov_b32 s14, -1 +; GFX9-NO-DS128-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v28, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[10:13], v28 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[14:17], v28 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: s_add_u32 s12, s12, s11 +; GFX9-NO-DS128-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v11 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v11, 0, 16 +; GFX9-NO-DS128-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GFX9-NO-DS128-NEXT: s_nop 0 +; GFX9-NO-DS128-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[20:23], v28 offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[29:32], v28 offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[33:36], v28 offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v10 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v13 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v12 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v25, 16, v30 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v24, v30, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v27, 16, v29 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v26, v29, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v38, 16, v32 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v37, v32, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v40, 16, v31 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v39, v31, 0, 16 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[29:32], v28 offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v42, 16, v34 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v41, v34, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v44, 16, v33 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v43, v33, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v46, 16, v36 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v45, v36, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v48, 16, v35 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v47, v35, 0, 16 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v50, 16, v30 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v49, v30, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v52, 16, v29 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v51, v29, 0, 16 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[33:36], v28 offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v56, 16, v31 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v55, v31, 0, 16 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[28:31], v28 offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v54, 16, v32 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v53, v32, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v15 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v10, 0, 16 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v32, 16, v31 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v31, v31, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v30 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v30, 0, 16 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v30, s0 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v13, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v12, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v14 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v17 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v15, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v14, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v17, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v16, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v21 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v21, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v20 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v20, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v23 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v20, v23, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v23, 16, v22 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v22, v22, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v58, 16, v34 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v57, v34, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v34, 16, v33 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v33, v33, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v60, 16, v36 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v59, v36, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v36, 16, v35 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v35, v35, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v62, 16, v29 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v61, v29, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v29, 16, v28 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v28, v28, 0, 16 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[0:1], v[31:32] offset0:30 offset1:31 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[28:29], v[61:62] offset0:28 offset1:29 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[35:36], v[59:60] offset0:26 offset1:27 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[33:34], v[57:58] offset0:24 offset1:25 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[55:56], v[53:54] offset0:22 offset1:23 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[51:52], v[49:50] offset0:20 offset1:21 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[47:48], v[45:46] offset0:18 offset1:19 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[43:44], v[41:42] offset0:16 offset1:17 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[39:40], v[37:38] offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[26:27], v[24:25] offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[22:23], v[20:21] offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[18:19], v[16:17] offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[14:15], v[12:13] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[10:11], v[8:9] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[6:7], v[4:5] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GFX9-NO-DS128-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GFX9-NO-DS128-NEXT: s_waitcnt vmcnt(0) +; GFX9-NO-DS128-NEXT: ds_write2_b64 v30, v[2:3], v[0:1] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v64i16_to_v64i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 116, @30, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 40(5.605194e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.W, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Y, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Z, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 56(7.847271e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.W, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Y, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Z, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 76(1.064987e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.W, OQAP, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Z, literal.x, +; EG-NEXT: 72(1.008935e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV T5.Y, OQAP, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Z, literal.x, +; EG-NEXT: 68(9.528830e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV T5.Z, OQAP, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Z, literal.x, +; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV T5.W, OQAP, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Z, literal.x, +; EG-NEXT: 92(1.289195e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T6.W +; EG-NEXT: MOV T6.Y, OQAP, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Z, literal.x, +; EG-NEXT: 88(1.233143e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T6.W +; EG-NEXT: MOV T6.Z, OQAP, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Z, literal.x, +; EG-NEXT: 84(1.177091e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T6.W +; EG-NEXT: MOV T6.W, OQAP, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Z, literal.x, +; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T7.W +; EG-NEXT: MOV T7.Y, OQAP, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Z, literal.x, +; EG-NEXT: 108(1.513402e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T7.W +; EG-NEXT: MOV T7.Z, OQAP, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Z, literal.x, +; EG-NEXT: 104(1.457350e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T7.W +; EG-NEXT: MOV T7.W, OQAP, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Z, literal.x, +; EG-NEXT: 100(1.401298e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T8.W +; EG-NEXT: MOV T8.Y, OQAP, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Z, literal.x, +; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T8.W +; EG-NEXT: MOV T8.Z, OQAP, +; EG-NEXT: ADD_INT * T8.W, KC0[2].Z, literal.x, +; EG-NEXT: 124(1.737610e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T8.W +; EG-NEXT: MOV T8.W, OQAP, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Z, literal.x, +; EG-NEXT: 120(1.681558e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T9.W +; EG-NEXT: MOV T9.Y, OQAP, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Z, literal.x, +; EG-NEXT: 116(1.625506e-43), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T9.W +; EG-NEXT: MOV T9.Z, OQAP, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: ALU 85, @31, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_READ_RET * OQAP, T9.W +; EG-NEXT: MOV T9.W, OQAP, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T10.W +; EG-NEXT: MOV T10.Y, OQAP, +; EG-NEXT: LSHR T10.W, T9.W, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Z, literal.y, +; EG-NEXT: 16(2.242078e-44), 112(1.569454e-43) +; EG-NEXT: LDS_READ_RET * OQAP, T11.W +; EG-NEXT: MOV T10.Z, OQAP, +; EG-NEXT: LSHR * T11.Z, T10.Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: BFE_INT T10.W, T10.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T0.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T0.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T0.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T1.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T1.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 52(7.286752e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T1.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 44(6.165713e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T2.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 36(5.044674e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T2.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 92(1.289195e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T2.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 84(1.177091e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T3.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 76(1.064987e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T3.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 68(9.528830e-44) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T3.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 124(1.737610e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T4.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 116(1.625506e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T4.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 108(1.513402e-43) +; EG-NEXT: ALU 83, @32, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T4.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 100(1.401298e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T5.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 156(2.186026e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T5.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 148(2.073922e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T5.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 140(1.961818e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T6.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 132(1.849714e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T6.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 188(2.634441e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T6.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 180(2.522337e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T7.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 172(2.410233e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T7.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 164(2.298129e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T7.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 220(3.082857e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T8.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 212(2.970753e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T8.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 204(2.858649e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T8.W, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 196(2.746545e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T9.Y, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 252(3.531272e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T12.Z, T9.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 244(3.419168e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: LSHR T11.Z, T10.Z, literal.x, +; EG-NEXT: BFE_INT T10.W, T12.Z, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 236(3.307064e-43) +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: BFE_INT T10.W, T11.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T11.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 228(3.194960e-43) +; EG-NEXT: ALU 94, @33, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T11.W, T10.W, +; EG-NEXT: BFE_INT T9.W, T9.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: BFE_INT T9.W, T10.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: BFE_INT T9.W, T0.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T10.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: BFE_INT T9.W, T0.Z, 0.0, literal.x, +; EG-NEXT: MOV * T10.W, KC0[2].Y, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T10.W, T9.W, +; EG-NEXT: BFE_INT T0.W, T0.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T9.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T9.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T9.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T9.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T1.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T2.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 88(1.233143e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T2.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 80(1.121039e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T2.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 72(1.008935e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T3.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 64(8.968310e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T3.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 120(1.681558e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T3.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 112(1.569454e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T4.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 104(1.457350e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T4.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 96(1.345247e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T4.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 152(2.129974e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T5.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 144(2.017870e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T5.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 136(1.905766e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T5.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 128(1.793662e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T6.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 184(2.578389e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T6.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 176(2.466285e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T6.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 168(2.354181e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT * T0.W, T7.Y, 0.0, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ALU 34, @34, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, +; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T7.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 216(3.026805e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T7.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 208(2.914701e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T8.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 200(2.802597e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T8.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 192(2.690493e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T8.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 248(3.475220e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T9.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 240(3.363116e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T9.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 232(3.251012e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: BFE_INT T0.W, T10.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 224(3.138909e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v64i16_to_v64i32: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 +; VI-DS128-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 +; VI-DS128-NEXT: s_mov_b32 s90, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v32, s1 +; VI-DS128-NEXT: ds_read_b128 v[8:11], v32 +; VI-DS128-NEXT: ds_read_b128 v[16:19], v32 offset:16 +; VI-DS128-NEXT: s_mov_b32 s91, 0xe80000 +; VI-DS128-NEXT: s_add_u32 s88, s88, s11 +; VI-DS128-NEXT: s_addc_u32 s89, s89, 0 +; VI-DS128-NEXT: ds_read_b128 v[24:27], v32 offset:32 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v19 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v18 +; VI-DS128-NEXT: v_bfe_i32 v2, v19, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v0, v18, 0, 16 +; VI-DS128-NEXT: buffer_store_dword v0, off, s[88:91], 0 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v2, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill +; VI-DS128-NEXT: v_ashrrev_i32_e32 v6, 16, v17 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v4, 16, v16 +; VI-DS128-NEXT: v_bfe_i32 v5, v17, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v3, v16, 0, 16 +; VI-DS128-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v4, off, s[88:91], 0 offset:20 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v5, off, s[88:91], 0 offset:24 ; 4-byte Folded Spill +; VI-DS128-NEXT: buffer_store_dword v6, off, s[88:91], 0 offset:28 ; 4-byte Folded Spill +; VI-DS128-NEXT: ds_read_b128 v[33:36], v32 offset:48 +; VI-DS128-NEXT: ds_read_b128 v[40:43], v32 offset:80 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(2) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v27 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v26 +; VI-DS128-NEXT: v_bfe_i32 v18, v27, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v16, v26, 0, 16 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v27, 16, v36 +; VI-DS128-NEXT: v_bfe_i32 v26, v36, 0, 16 +; VI-DS128-NEXT: ds_read_b128 v[36:39], v32 offset:64 +; VI-DS128-NEXT: ds_read_b128 v[56:59], v32 offset:96 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(2) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v53, 16, v40 +; VI-DS128-NEXT: v_bfe_i32 v52, v40, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v11 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v47, 16, v39 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v45, 16, v38 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v51, 16, v37 +; VI-DS128-NEXT: v_bfe_i32 v46, v39, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v44, v38, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v50, v37, 0, 16 +; VI-DS128-NEXT: ds_read_b128 v[37:40], v32 offset:112 +; VI-DS128-NEXT: v_mov_b32_e32 v32, s0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v10 +; VI-DS128-NEXT: v_mov_b32_e32 v23, v15 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v9 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v38 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v37 +; VI-DS128-NEXT: v_bfe_i32 v2, v38, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v0, v37, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8 +; VI-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v20, v10, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v12, v8, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v25 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v24 +; VI-DS128-NEXT: v_bfe_i32 v10, v25, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v8, v24, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v25, 16, v35 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v31, 16, v34 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v29, 16, v33 +; VI-DS128-NEXT: v_bfe_i32 v24, v35, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v30, v34, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v28, v33, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v49, 16, v36 +; VI-DS128-NEXT: v_bfe_i32 v48, v36, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v36, 16, v43 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v34, 16, v42 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v55, 16, v41 +; VI-DS128-NEXT: v_bfe_i32 v35, v43, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v33, v42, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v54, v41, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v62, 16, v59 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v60, 16, v58 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v57 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v56 +; VI-DS128-NEXT: v_bfe_i32 v61, v59, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v59, v58, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v6, v57, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v4, v56, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v43, 16, v40 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v41, 16, v39 +; VI-DS128-NEXT: v_bfe_i32 v42, v40, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v40, v39, 0, 16 +; VI-DS128-NEXT: ds_write_b128 v32, v[0:3] offset:224 +; VI-DS128-NEXT: ds_write_b128 v32, v[40:43] offset:240 +; VI-DS128-NEXT: ds_write_b128 v32, v[4:7] offset:192 +; VI-DS128-NEXT: ds_write_b128 v32, v[59:62] offset:208 +; VI-DS128-NEXT: ds_write_b128 v32, v[52:55] offset:160 +; VI-DS128-NEXT: ds_write_b128 v32, v[33:36] offset:176 +; VI-DS128-NEXT: ds_write_b128 v32, v[48:51] offset:128 +; VI-DS128-NEXT: ds_write_b128 v32, v[44:47] offset:144 +; VI-DS128-NEXT: ds_write_b128 v32, v[28:31] offset:96 +; VI-DS128-NEXT: ds_write_b128 v32, v[24:27] offset:112 +; VI-DS128-NEXT: ds_write_b128 v32, v[8:11] offset:64 +; VI-DS128-NEXT: ds_write_b128 v32, v[16:19] offset:80 +; VI-DS128-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:20 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:24 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:28 ; 4-byte Folded Reload +; VI-DS128-NEXT: s_waitcnt vmcnt(0) +; VI-DS128-NEXT: ds_write_b128 v32, v[0:3] offset:32 +; VI-DS128-NEXT: buffer_load_dword v0, off, s[88:91], 0 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload +; VI-DS128-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload +; VI-DS128-NEXT: s_waitcnt vmcnt(0) +; VI-DS128-NEXT: ds_write_b128 v32, v[0:3] offset:48 +; VI-DS128-NEXT: ds_write_b128 v32, v[12:15] +; VI-DS128-NEXT: ds_write_b128 v32, v[20:23] offset:16 +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v64i16_to_v64i32: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GFX9-DS128-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GFX9-DS128-NEXT: s_mov_b32 s14, -1 +; GFX9-DS128-NEXT: s_mov_b32 s15, 0xe00000 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v32, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[8:11], v32 +; GFX9-DS128-NEXT: ds_read_b128 v[16:19], v32 offset:16 +; GFX9-DS128-NEXT: s_add_u32 s12, s12, s11 +; GFX9-DS128-NEXT: s_addc_u32 s13, s13, 0 +; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v32 offset:32 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v11 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v19 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v18 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v19, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v0, v18, 0, 16 +; GFX9-DS128-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: s_nop 0 +; GFX9-DS128-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v6, 16, v17 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v4, 16, v16 +; GFX9-DS128-NEXT: v_bfe_i32 v5, v17, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v3, v16, 0, 16 +; GFX9-DS128-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: s_nop 0 +; GFX9-DS128-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill +; GFX9-DS128-NEXT: ds_read_b128 v[33:36], v32 offset:48 +; GFX9-DS128-NEXT: ds_read_b128 v[40:43], v32 offset:80 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v19, 16, v27 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 16, v26 +; GFX9-DS128-NEXT: v_bfe_i32 v18, v27, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v16, v26, 0, 16 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v27, 16, v36 +; GFX9-DS128-NEXT: v_bfe_i32 v26, v36, 0, 16 +; GFX9-DS128-NEXT: ds_read_b128 v[36:39], v32 offset:64 +; GFX9-DS128-NEXT: ds_read_b128 v[56:59], v32 offset:96 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v53, 16, v40 +; GFX9-DS128-NEXT: v_bfe_i32 v52, v40, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v10 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v47, 16, v39 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v45, 16, v38 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v51, 16, v37 +; GFX9-DS128-NEXT: v_bfe_i32 v46, v39, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v44, v38, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v50, v37, 0, 16 +; GFX9-DS128-NEXT: ds_read_b128 v[37:40], v32 offset:112 +; GFX9-DS128-NEXT: v_mov_b32_e32 v32, s0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v23, v15 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v9 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v38 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v37 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v38, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v0, v37, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v20, v10, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v12, v8, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v11, 16, v25 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v9, 16, v24 +; GFX9-DS128-NEXT: v_bfe_i32 v10, v25, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v8, v24, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v25, 16, v35 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v31, 16, v34 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v29, 16, v33 +; GFX9-DS128-NEXT: v_bfe_i32 v24, v35, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v30, v34, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v28, v33, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v49, 16, v36 +; GFX9-DS128-NEXT: v_bfe_i32 v48, v36, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v36, 16, v43 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v34, 16, v42 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v55, 16, v41 +; GFX9-DS128-NEXT: v_bfe_i32 v35, v43, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v33, v42, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v54, v41, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v62, 16, v59 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v60, 16, v58 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 16, v57 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 16, v56 +; GFX9-DS128-NEXT: v_bfe_i32 v61, v59, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v59, v58, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v57, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v4, v56, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v43, 16, v40 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v41, 16, v39 +; GFX9-DS128-NEXT: v_bfe_i32 v42, v40, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v40, v39, 0, 16 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[0:3] offset:224 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[40:43] offset:240 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[4:7] offset:192 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[59:62] offset:208 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[52:55] offset:160 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[33:36] offset:176 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[48:51] offset:128 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[44:47] offset:144 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[28:31] offset:96 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[24:27] offset:112 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[8:11] offset:64 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[16:19] offset:80 +; GFX9-DS128-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: s_waitcnt vmcnt(0) +; GFX9-DS128-NEXT: ds_write_b128 v32, v[0:3] offset:32 +; GFX9-DS128-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GFX9-DS128-NEXT: s_waitcnt vmcnt(0) +; GFX9-DS128-NEXT: ds_write_b128 v32, v[0:3] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v32, v[12:15] +; GFX9-DS128-NEXT: ds_write_b128 v32, v[20:23] offset:16 +; GFX9-DS128-NEXT: s_endpgm %load = load <64 x i16>, ptr addrspace(3) %in %ext = sext <64 x i16> %load to <64 x i32> store <64 x i32> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_i16_to_i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - -; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]], -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} - -; GCN: ds_write_b64 v{{[0-9]+}}, v[[[LO]]:[[HI]]] - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG-DAG: LDS_WRITE define amdgpu_kernel void @local_zextload_i16_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_i16_to_i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_u16 v0, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b64 v2, v[0:1] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_zextload_i16_to_i64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: v_mov_b32_e32 v1, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_u16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-NEXT: ds_write_b64 v2, v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_zextload_i16_to_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_u16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: ds_write_b64 v2, v[0:1] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_i16_to_i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 8, @35, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: MOV T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 0(0.000000e+00), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN %a = load i16, ptr addrspace(3) %in %ext = zext i16 %a to i64 store i64 %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_i16_to_i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - ; FIXME: Need to optimize this sequence to avoid an extra shift. ; t25: i32,ch = load t12, t10, undef:i32 ; t28: i64 = any_extend t25 ; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16 -; SI: ds_read_i16 v[[LO:[0-9]+]], -; GFX89: ds_read_u16 v[[ULO:[0-9]+]] -; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16 -; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] - -; GCN: ds_write_b64 v{{[0-9]+}}, v[[[LO]]:[[HI]]] - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal -; EG-DAG: LDS_WRITE -; EG-DAG: 16 -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_sextload_i16_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_i16_to_i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_i16 v0, v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: ds_write_b64 v2, v[0:1] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_sextload_i16_to_i64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_u16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-NEXT: ds_write_b64 v2, v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_sextload_i16_to_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_u16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-NEXT: ds_write_b64 v2, v[0:1] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_i16_to_i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 10, @36, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.X, OQAP, +; EG-NEXT: BFE_INT * T0.W, PV.X, 0.0, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T1.W, PV.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN %a = load i16, ptr addrspace(3) %in %ext = sext i16 %a to i64 store i64 %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG-DAG: LDS_WRITE define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v1i16_to_v1i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_u16 v0, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write_b64 v2, v[0:1] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_zextload_v1i16_to_v1i64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: v_mov_b32_e32 v1, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_u16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-NEXT: ds_write_b64 v2, v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_zextload_v1i16_to_v1i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_u16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: ds_write_b64 v2, v[0:1] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v1i16_to_v1i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 8, @37, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: MOV T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 0(0.000000e+00), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN %load = load <1 x i16>, ptr addrspace(3) %in %ext = zext <1 x i16> %load to <1 x i64> store <1 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z -; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] -; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP -; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y -; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal -; EG-DAG: LDS_WRITE -; EG-DAG: 16 -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v1i16_to_v1i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_i16 v0, v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: ds_write_b64 v2, v[0:1] +; SI-NEXT: s_endpgm +; +; VI-LABEL: local_sextload_v1i16_to_v1i64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: ds_read_u16 v0, v0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-NEXT: ds_write_b64 v2, v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: local_sextload_v1i16_to_v1i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: ds_read_u16 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-NEXT: ds_write_b64 v2, v[0:1] +; GFX9-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v1i16_to_v1i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 10, @38, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.X, OQAP, +; EG-NEXT: BFE_INT * T0.W, PV.X, 0.0, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T1.W, PV.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN %load = load <1 x i16>, ptr addrspace(3) %in %ext = sext <1 x i16> %load to <1 x i64> store <1 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v2i16_to_v2i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b32 v2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: v_mov_b32_e32 v3, v1 +; SI-NEXT: ds_write2_b64 v4, v[2:3], v[0:1] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v2i16_to_v2i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v1, 0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v3, v1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read_b32 v0, v0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; VI-NO-DS128-NEXT: ds_write2_b64 v4, v[2:3], v[0:1] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v2i16_to_v2i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read_b32 v0, v0 +; GFX9-NO-DS128-NEXT: s_mov_b32 s1, 0xffff +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_and_b32_sdwa v2, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v2i16_to_v2i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 17, @39, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.Y, OQAP, +; EG-NEXT: AND_INT T0.W, PV.Y, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MOV * T1.W, literal.y, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v2i16_to_v2i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: v_mov_b32_e32 v1, 0 +; VI-DS128-NEXT: v_mov_b32_e32 v3, v1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b32 v0, v0 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v2i16_to_v2i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b32 v2, v0 +; GFX9-DS128-NEXT: s_mov_b32 s1, 0xffff +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v2 +; GFX9-DS128-NEXT: v_and_b32_sdwa v2, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] +; GFX9-DS128-NEXT: s_endpgm %load = load <2 x i16>, ptr addrspace(3) %in %ext = zext <2 x i16> %load to <2 x i64> store <2 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: ASHR define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v2i16_to_v2i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b32 v0, v0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; SI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v2, v1, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v2i16_to_v2i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read_b32 v0, v0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; VI-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v2, v1, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-NO-DS128-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v2i16_to_v2i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read_b32 v0, v0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v2i16_to_v2i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 18, @40, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.Y, OQAP, +; EG-NEXT: BFE_INT * T0.W, PV.Y, 0.0, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T1.W, PV.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 4(5.605194e-45) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v2i16_to_v2i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b32 v1, v0 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; VI-DS128-NEXT: v_bfe_i32 v0, v1, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v2i16_to_v2i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b32 v1, v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-DS128-NEXT: v_bfe_i32 v0, v1, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] +; GFX9-DS128-NEXT: s_endpgm %load = load <2 x i16>, ptr addrspace(3) %in %ext = sext <2 x i16> %load to <2 x i64> store <2 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v4i16_to_v4i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b64 v[0:1], v0 +; SI-NEXT: v_mov_b32_e32 v3, 0 +; SI-NEXT: v_mov_b32_e32 v5, v3 +; SI-NEXT: v_mov_b32_e32 v7, v3 +; SI-NEXT: v_mov_b32_e32 v9, v3 +; SI-NEXT: v_mov_b32_e32 v10, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; SI-NEXT: v_and_b32_e32 v6, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v1 +; SI-NEXT: ds_write2_b64 v10, v[4:5], v[2:3] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v10, v[6:7], v[8:9] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v4i16_to_v4i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v2, 0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, v2 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v6, v2 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read_b64 v[0:1], v0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v9, s0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v8, v2 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_and_b32_e32 v7, 0xffff, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; VI-NO-DS128-NEXT: v_and_b32_e32 v3, 0xffff, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-NO-DS128-NEXT: ds_write2_b64 v9, v[3:4], v[1:2] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v9, v[7:8], v[5:6] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v4i16_to_v4i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_mov_b32 s2, 0xffff +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v9, s0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, v2 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_and_b32_sdwa v7, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NO-DS128-NEXT: v_and_b32_sdwa v5, s2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v3, 0xffff, v1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v9, v[3:4], v[5:6] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v9, v[1:2], v[7:8] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v4i16_to_v4i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 35, @41, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MOV * T1.W, literal.y, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v4i16_to_v4i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: v_mov_b32_e32 v1, 0 +; VI-DS128-NEXT: v_mov_b32_e32 v3, v1 +; VI-DS128-NEXT: v_mov_b32_e32 v5, v1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b64 v[7:8], v0 +; VI-DS128-NEXT: v_mov_b32_e32 v9, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v7 +; VI-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v7 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v8 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v8 +; VI-DS128-NEXT: v_mov_b32_e32 v7, v1 +; VI-DS128-NEXT: ds_write_b128 v9, v[0:3] offset:16 +; VI-DS128-NEXT: ds_write_b128 v9, v[4:7] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v4i16_to_v4i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_mov_b32 s2, 0xffff +; GFX9-DS128-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-DS128-NEXT: v_mov_b32_e32 v5, v1 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b64 v[6:7], v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v8, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v6 +; GFX9-DS128-NEXT: v_and_b32_sdwa v6, s2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v7 +; GFX9-DS128-NEXT: v_and_b32_sdwa v2, s2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DS128-NEXT: v_mov_b32_e32 v7, v1 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:16 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7] +; GFX9-DS128-NEXT: s_endpgm %load = load <4 x i16>, ptr addrspace(3) %in %ext = zext <4 x i16> %load to <4 x i64> store <4 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v4i16_to_v4i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read_b64 v[0:1], v0 +; SI-NEXT: v_mov_b32_e32 v9, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v3, v1 +; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; SI-NEXT: v_ashrrev_i32_e32 v1, 16, v1 +; SI-NEXT: v_bfe_i32 v3, v3, 0, 16 +; SI-NEXT: v_bfe_i32 v5, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v7, v4, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 +; SI-NEXT: ds_write2_b64 v9, v[3:4], v[1:2] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v9, v[5:6], v[7:8] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v4i16_to_v4i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read_b64 v[0:1], v0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v8, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-NO-DS128-NEXT: ds_write2_b64 v8, v[6:7], v[4:5] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v8, v[0:1], v[2:3] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v4i16_to_v4i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v8, v[6:7], v[4:5] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v8, v[0:1], v[2:3] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v4i16_to_v4i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 39, @42, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: BFE_INT * T0.W, T0.Y, 0.0, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: BFE_INT T1.Z, PV.Z, 0.0, literal.x, +; EG-NEXT: ASHR T1.W, PV.W, literal.y, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ASHR T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 20(2.802597e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.Z, +; EG-NEXT: ASHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v4i16_to_v4i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b64 v[0:1], v0 +; VI-DS128-NEXT: v_mov_b32_e32 v8, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v3, v1 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; VI-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:16 +; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v4i16_to_v4i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b64 v[0:1], v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v8, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:16 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[0:3] +; GFX9-DS128-NEXT: s_endpgm %load = load <4 x i16>, ptr addrspace(3) %in %ext = sext <4 x i16> %load to <4 x i64> store <4 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v8i16_to_v8i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; SI-NEXT: v_mov_b32_e32 v5, 0 +; SI-NEXT: v_mov_b32_e32 v7, v5 +; SI-NEXT: v_mov_b32_e32 v9, v5 +; SI-NEXT: v_mov_b32_e32 v11, v5 +; SI-NEXT: v_mov_b32_e32 v13, v5 +; SI-NEXT: v_mov_b32_e32 v15, v5 +; SI-NEXT: v_mov_b32_e32 v17, v5 +; SI-NEXT: v_mov_b32_e32 v19, v5 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v2 +; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v0 +; SI-NEXT: v_and_b32_e32 v14, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v12, 0xffff, v1 +; SI-NEXT: v_and_b32_e32 v10, 0xffff, v2 +; SI-NEXT: v_and_b32_e32 v8, 0xffff, v3 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: ds_write2_b64 v0, v[8:9], v[6:7] offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v0, v[12:13], v[4:5] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v0, v[10:11], v[16:17] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v0, v[14:15], v[18:19] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v8i16_to_v8i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v11, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v3 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v3, 0 +; VI-NO-DS128-NEXT: v_and_b32_e32 v9, 0xffff, v2 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v10, v3 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[9:10], v[2:3] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v9, v3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v7, v3 +; VI-NO-DS128-NEXT: v_and_b32_e32 v5, 0xffff, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[6:7], v[8:9] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v2, v3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v6, v3 +; VI-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[5:6], v[1:2] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v1, v3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v5, v3 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[4:5], v[0:1] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v8i16_to_v8i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v12, 0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, v12 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v10, v12 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; GFX9-NO-DS128-NEXT: s_mov_b32 s1, 0xffff +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v13, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_and_b32_sdwa v7, s1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v11, 0xffff, v3 +; GFX9-NO-DS128-NEXT: v_and_b32_sdwa v6, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v3, v12 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v13, v[11:12], v[7:8] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v7, v12 +; GFX9-NO-DS128-NEXT: v_and_b32_sdwa v5, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v9, 0xffff, v1 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v13, v[2:3], v[6:7] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v6, v12 +; GFX9-NO-DS128-NEXT: v_and_b32_sdwa v4, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v1, v12 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v13, v[9:10], v[5:6] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v13, v[0:1], v[4:5] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v8i16_to_v8i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 71, @43, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: AND_INT T1.W, T0.W, literal.x, +; EG-NEXT: MOV * T2.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MOV * T1.W, literal.y, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v8i16_to_v8i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v0 +; VI-DS128-NEXT: v_mov_b32_e32 v14, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; VI-DS128-NEXT: v_and_b32_e32 v7, 0xffff, v1 +; VI-DS128-NEXT: v_mov_b32_e32 v1, 0 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v12, 16, v2 +; VI-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v2 +; VI-DS128-NEXT: v_mov_b32_e32 v11, v1 +; VI-DS128-NEXT: v_mov_b32_e32 v13, v1 +; VI-DS128-NEXT: ds_write_b128 v14, v[10:13] offset:32 +; VI-DS128-NEXT: v_mov_b32_e32 v8, v1 +; VI-DS128-NEXT: v_mov_b32_e32 v10, v1 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; VI-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v3 +; VI-DS128-NEXT: v_mov_b32_e32 v3, v1 +; VI-DS128-NEXT: ds_write_b128 v14, v[7:10] offset:16 +; VI-DS128-NEXT: v_mov_b32_e32 v5, v1 +; VI-DS128-NEXT: v_mov_b32_e32 v7, v1 +; VI-DS128-NEXT: ds_write_b128 v14, v[0:3] offset:48 +; VI-DS128-NEXT: ds_write_b128 v14, v[4:7] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v8i16_to_v8i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: v_mov_b32_e32 v11, 0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v13, v11 +; GFX9-DS128-NEXT: v_mov_b32_e32 v8, v11 +; GFX9-DS128-NEXT: v_mov_b32_e32 v5, v11 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v0 +; GFX9-DS128-NEXT: s_mov_b32 s1, 0xffff +; GFX9-DS128-NEXT: v_mov_b32_e32 v14, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v3 +; GFX9-DS128-NEXT: v_and_b32_sdwa v12, s1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DS128-NEXT: v_and_b32_e32 v7, 0xffff, v1 +; GFX9-DS128-NEXT: v_and_b32_sdwa v9, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DS128-NEXT: ds_write_b128 v14, v[10:13] offset:48 +; GFX9-DS128-NEXT: v_mov_b32_e32 v10, v11 +; GFX9-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; GFX9-DS128-NEXT: v_and_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v2 +; GFX9-DS128-NEXT: v_and_b32_sdwa v2, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DS128-NEXT: v_mov_b32_e32 v1, v11 +; GFX9-DS128-NEXT: v_mov_b32_e32 v3, v11 +; GFX9-DS128-NEXT: ds_write_b128 v14, v[7:10] offset:16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v7, v11 +; GFX9-DS128-NEXT: ds_write_b128 v14, v[0:3] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v14, v[4:7] +; GFX9-DS128-NEXT: s_endpgm %load = load <8 x i16>, ptr addrspace(3) %in %ext = zext <8 x i16> %load to <8 x i64> store <8 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v8i16_to_v8i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; SI-NEXT: v_mov_b32_e32 v16, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v9, v3 +; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v0 +; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; SI-NEXT: v_ashrrev_i32_e32 v4, 16, v1 +; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; SI-NEXT: v_ashrrev_i32_e32 v6, 16, v3 +; SI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v8, v1, 0, 16 +; SI-NEXT: v_bfe_i32 v2, v2, 0, 16 +; SI-NEXT: v_bfe_i32 v10, v9, 0, 16 +; SI-NEXT: v_bfe_i32 v12, v12, 0, 16 +; SI-NEXT: v_bfe_i32 v14, v11, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; SI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; SI-NEXT: ds_write2_b64 v16, v[10:11], v[6:7] offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v16, v[8:9], v[4:5] offset0:2 offset1:3 +; SI-NEXT: ds_write2_b64 v16, v[2:3], v[14:15] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v16, v[0:1], v[12:13] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v8i16_to_v8i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v16, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v10, 16, v3 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v2, 0, 16 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v2, v3 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; VI-NO-DS128-NEXT: v_bfe_i32 v10, v10, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v6, v5, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v8, v7, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; VI-NO-DS128-NEXT: v_bfe_i32 v12, v0, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v0, v1, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-NO-DS128-NEXT: ds_write2_b64 v16, v[2:3], v[10:11] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v16, v[14:15], v[8:9] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v16, v[0:1], v[6:7] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v16, v[12:13], v[4:5] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v8i16_to_v8i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v16, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v3 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v2, v3 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v9, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v5, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v7, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v16, v[2:3], v[10:11] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v16, v[14:15], v[8:9] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v16, v[0:1], v[6:7] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v16, v[12:13], v[4:5] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v8i16_to_v8i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 80, @44, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV * T0.W, OQAP, +; EG-NEXT: BFE_INT T1.W, T0.Z, 0.0, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.y, +; EG-NEXT: 16(2.242078e-44), 12(1.681558e-44) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: BFE_INT T1.Z, T0.W, 0.0, literal.x, +; EG-NEXT: ASHR T2.W, T1.W, literal.y, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: BFE_INT T2.Z, T0.Y, 0.0, literal.x, +; EG-NEXT: ASHR T2.W, T1.Z, literal.y, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: BFE_INT T3.Z, T1.Y, 0.0, literal.x, +; EG-NEXT: ASHR T2.W, T2.Z, literal.y, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: ASHR T2.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 52(7.286752e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: MOV * T2.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ASHR T1.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ASHR T1.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T1.Z, +; EG-NEXT: ASHR T1.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ASHR T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T2.Z, +; EG-NEXT: ASHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 44(6.165713e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T3.Z, +; EG-NEXT: ASHR T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v8i16_to_v8i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v0 +; VI-DS128-NEXT: v_mov_b32_e32 v16, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; VI-DS128-NEXT: v_bfe_i32 v6, v0, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; VI-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16 +; VI-DS128-NEXT: v_mov_b32_e32 v0, v3 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; VI-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-DS128-NEXT: ds_write_b128 v16, v[0:3] offset:48 +; VI-DS128-NEXT: ds_write_b128 v16, v[12:15] offset:32 +; VI-DS128-NEXT: ds_write_b128 v16, v[8:11] offset:16 +; VI-DS128-NEXT: ds_write_b128 v16, v[4:7] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v8i16_to_v8i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v16, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v0, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX9-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX9-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX9-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-DS128-NEXT: ds_write_b128 v16, v[0:3] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v16, v[12:15] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v16, v[8:11] offset:16 +; GFX9-DS128-NEXT: ds_write_b128 v16, v[4:7] +; GFX9-DS128-NEXT: s_endpgm %load = load <8 x i16>, ptr addrspace(3) %in %ext = sext <8 x i16> %load to <8 x i64> store <8 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v16i16_to_v16i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v4, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v4 offset0:2 offset1:3 +; SI-NEXT: v_mov_b32_e32 v9, 0 +; SI-NEXT: ds_read2_b64 v[4:7], v4 offset1:1 +; SI-NEXT: v_mov_b32_e32 v11, v9 +; SI-NEXT: v_mov_b32_e32 v13, v9 +; SI-NEXT: v_mov_b32_e32 v15, v9 +; SI-NEXT: v_mov_b32_e32 v17, v9 +; SI-NEXT: v_mov_b32_e32 v20, s0 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v1 +; SI-NEXT: v_and_b32_e32 v16, 0xffff, v1 +; SI-NEXT: ds_write2_b64 v20, v[16:17], v[14:15] offset0:10 offset1:11 +; SI-NEXT: v_mov_b32_e32 v16, v9 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v5 +; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v7 +; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v3 +; SI-NEXT: v_and_b32_e32 v14, 0xffff, v3 +; SI-NEXT: ds_write2_b64 v20, v[14:15], v[12:13] offset0:14 offset1:15 +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; SI-NEXT: v_and_b32_e32 v15, 0xffff, v7 +; SI-NEXT: ds_write2_b64 v20, v[15:16], v[10:11] offset0:6 offset1:7 +; SI-NEXT: v_and_b32_e32 v7, 0xffff, v4 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v5 +; SI-NEXT: v_and_b32_e32 v10, 0xffff, v6 +; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v0 +; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 +; SI-NEXT: v_and_b32_e32 v16, 0xffff, v2 +; SI-NEXT: v_and_b32_e32 v18, 0xffff, v0 +; SI-NEXT: v_mov_b32_e32 v5, v9 +; SI-NEXT: ds_write2_b64 v20, v[4:5], v[8:9] offset0:2 offset1:3 +; SI-NEXT: v_mov_b32_e32 v19, v9 +; SI-NEXT: v_mov_b32_e32 v8, v9 +; SI-NEXT: v_mov_b32_e32 v15, v9 +; SI-NEXT: v_mov_b32_e32 v2, v9 +; SI-NEXT: v_mov_b32_e32 v4, v9 +; SI-NEXT: ds_write2_b64 v20, v[18:19], v[12:13] offset0:8 offset1:9 +; SI-NEXT: ds_write2_b64 v20, v[16:17], v[14:15] offset0:12 offset1:13 +; SI-NEXT: ds_write2_b64 v20, v[10:11], v[1:2] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v20, v[7:8], v[3:4] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v16i16_to_v16i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v8, 0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v10, v8 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v13, v8 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v14, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v5 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v14, v[12:13], v[9:10] offset0:10 offset1:11 +; VI-NO-DS128-NEXT: v_and_b32_e32 v5, 0xffff, v6 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v6 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v6, v8 +; VI-NO-DS128-NEXT: ds_write2_b64 v14, v[5:6], v[9:10] offset0:12 offset1:13 +; VI-NO-DS128-NEXT: v_and_b32_e32 v5, 0xffff, v7 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v7 +; VI-NO-DS128-NEXT: ds_write2_b64 v14, v[5:6], v[9:10] offset0:14 offset1:15 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v3 +; VI-NO-DS128-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; VI-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v4 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, v8 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; VI-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; VI-NO-DS128-NEXT: ds_write2_b64 v14, v[12:13], v[7:8] offset0:8 offset1:9 +; VI-NO-DS128-NEXT: ds_write2_b64 v14, v[3:4], v[9:10] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v3, v8 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v7, v8 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; VI-NO-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; VI-NO-DS128-NEXT: ds_write2_b64 v14, v[2:3], v[6:7] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v2, v8 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v6, v8 +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; VI-NO-DS128-NEXT: ds_write2_b64 v14, v[1:2], v[5:6] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v1, v8 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v12, v8 +; VI-NO-DS128-NEXT: ds_write2_b64 v14, v[0:1], v[11:12] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v16i16_to_v16i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v10, v8 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v12, v8 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v14, v8 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v15, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v11, 0xffff, v5 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[11:12], v[9:10] offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v5, 0xffff, v6 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v6 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v6, v8 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[5:6], v[9:10] offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v5, 0xffff, v7 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[5:6], v[9:10] offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v4 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v11, v8 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v3 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[10:11], v[7:8] offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, v8 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v10, v8 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[3:4], v[9:10] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v3, v8 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v7, v8 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[2:3], v[6:7] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v6, v8 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[1:2], v[5:6] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v1, v8 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[0:1], v[13:14] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v16i16_to_v16i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 100, @45, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: MOV * T2.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: LSHR T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Y, literal.x, +; EG-NEXT: MOV * T3.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T1.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 72(1.008935e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 88(1.233143e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 104(1.457350e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 120(1.681558e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MOV * T1.W, literal.y, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: ALU 42, @46, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 76(1.064987e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 68(9.528830e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 92(1.289195e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 84(1.177091e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 108(1.513402e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 100(1.401298e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 124(1.737610e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 116(1.625506e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v16i16_to_v16i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: v_mov_b32_e32 v26, 0 +; VI-DS128-NEXT: v_mov_b32_e32 v22, v26 +; VI-DS128-NEXT: v_mov_b32_e32 v24, v26 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v5, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v5 +; VI-DS128-NEXT: ds_read_b128 v[13:16], v5 offset:16 +; VI-DS128-NEXT: v_mov_b32_e32 v11, v26 +; VI-DS128-NEXT: v_mov_b32_e32 v19, v26 +; VI-DS128-NEXT: v_mov_b32_e32 v8, v26 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v12, 16, v2 +; VI-DS128-NEXT: v_and_b32_e32 v10, 0xffff, v2 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v23, 16, v13 +; VI-DS128-NEXT: v_and_b32_e32 v21, 0xffff, v13 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v27, 16, v14 +; VI-DS128-NEXT: v_and_b32_e32 v25, 0xffff, v14 +; VI-DS128-NEXT: v_mov_b32_e32 v14, s0 +; VI-DS128-NEXT: v_mov_b32_e32 v13, v26 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; VI-DS128-NEXT: v_and_b32_e32 v7, 0xffff, v1 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v20, 16, v16 +; VI-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v16 +; VI-DS128-NEXT: ds_write_b128 v14, v[21:24] offset:64 +; VI-DS128-NEXT: v_mov_b32_e32 v21, v26 +; VI-DS128-NEXT: ds_write_b128 v14, v[10:13] offset:32 +; VI-DS128-NEXT: v_mov_b32_e32 v10, v26 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; VI-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v3 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v15 +; VI-DS128-NEXT: v_and_b32_e32 v15, 0xffff, v15 +; VI-DS128-NEXT: ds_write_b128 v14, v[18:21] offset:112 +; VI-DS128-NEXT: v_mov_b32_e32 v16, v26 +; VI-DS128-NEXT: v_mov_b32_e32 v18, v26 +; VI-DS128-NEXT: v_mov_b32_e32 v1, v26 +; VI-DS128-NEXT: v_mov_b32_e32 v3, v26 +; VI-DS128-NEXT: v_mov_b32_e32 v28, v26 +; VI-DS128-NEXT: ds_write_b128 v14, v[7:10] offset:16 +; VI-DS128-NEXT: v_mov_b32_e32 v5, v26 +; VI-DS128-NEXT: v_mov_b32_e32 v7, v26 +; VI-DS128-NEXT: ds_write_b128 v14, v[15:18] offset:96 +; VI-DS128-NEXT: ds_write_b128 v14, v[0:3] offset:48 +; VI-DS128-NEXT: ds_write_b128 v14, v[25:28] offset:80 +; VI-DS128-NEXT: ds_write_b128 v14, v[4:7] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v16i16_to_v16i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: v_mov_b32_e32 v25, 0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v21, v25 +; GFX9-DS128-NEXT: v_mov_b32_e32 v23, v25 +; GFX9-DS128-NEXT: v_mov_b32_e32 v18, v25 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v4 +; GFX9-DS128-NEXT: ds_read_b128 v[4:7], v4 offset:16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v28, s0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v15, v25 +; GFX9-DS128-NEXT: v_mov_b32_e32 v12, v25 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v2 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v22, 16, v7 +; GFX9-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v7 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6 +; GFX9-DS128-NEXT: v_and_b32_e32 v17, 0xffff, v6 +; GFX9-DS128-NEXT: ds_write_b128 v28, v[20:23] offset:112 +; GFX9-DS128-NEXT: v_mov_b32_e32 v20, v25 +; GFX9-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v2 +; GFX9-DS128-NEXT: ds_write_b128 v28, v[17:20] offset:96 +; GFX9-DS128-NEXT: v_mov_b32_e32 v17, v25 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v1 +; GFX9-DS128-NEXT: v_and_b32_e32 v11, 0xffff, v1 +; GFX9-DS128-NEXT: ds_write_b128 v28, v[14:17] offset:32 +; GFX9-DS128-NEXT: v_mov_b32_e32 v14, v25 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v10, 16, v0 +; GFX9-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v0 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v3 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v4 +; GFX9-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v26, 16, v5 +; GFX9-DS128-NEXT: v_and_b32_e32 v24, 0xffff, v5 +; GFX9-DS128-NEXT: v_mov_b32_e32 v5, v25 +; GFX9-DS128-NEXT: v_mov_b32_e32 v7, v25 +; GFX9-DS128-NEXT: v_mov_b32_e32 v1, v25 +; GFX9-DS128-NEXT: v_mov_b32_e32 v3, v25 +; GFX9-DS128-NEXT: v_mov_b32_e32 v27, v25 +; GFX9-DS128-NEXT: ds_write_b128 v28, v[11:14] offset:16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v9, v25 +; GFX9-DS128-NEXT: v_mov_b32_e32 v11, v25 +; GFX9-DS128-NEXT: ds_write_b128 v28, v[4:7] offset:64 +; GFX9-DS128-NEXT: ds_write_b128 v28, v[0:3] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v28, v[24:27] offset:80 +; GFX9-DS128-NEXT: ds_write_b128 v28, v[8:11] +; GFX9-DS128-NEXT: s_endpgm %load = load <16 x i16>, ptr addrspace(3) %in %ext = zext <16 x i16> %load to <16 x i64> store <16 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v16i16_to_v16i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v4, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v4 offset0:2 offset1:3 +; SI-NEXT: ds_read2_b64 v[4:7], v4 offset1:1 +; SI-NEXT: v_mov_b32_e32 v18, s0 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_mov_b32_e32 v12, v3 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v14, v7 +; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v2 +; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v0 +; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v6 +; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v4 +; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; SI-NEXT: v_ashrrev_i32_e32 v8, 16, v5 +; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v3 +; SI-NEXT: v_ashrrev_i32_e32 v10, 16, v3 +; SI-NEXT: v_bfe_i32 v12, v12, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; SI-NEXT: ds_write2_b64 v18, v[12:13], v[10:11] offset0:14 offset1:15 +; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v1 +; SI-NEXT: v_ashrrev_i32_e32 v10, 16, v1 +; SI-NEXT: v_bfe_i32 v12, v1, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; SI-NEXT: ds_write2_b64 v18, v[12:13], v[10:11] offset0:10 offset1:11 +; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v7 +; SI-NEXT: v_ashrrev_i32_e32 v10, 16, v7 +; SI-NEXT: v_bfe_i32 v12, v14, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; SI-NEXT: ds_write2_b64 v18, v[12:13], v[10:11] offset0:6 offset1:7 +; SI-NEXT: v_bfe_i32 v1, v4, 0, 16 +; SI-NEXT: v_bfe_i32 v3, v5, 0, 16 +; SI-NEXT: v_bfe_i32 v5, v6, 0, 16 +; SI-NEXT: v_bfe_i32 v7, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v10, v2, 0, 16 +; SI-NEXT: v_bfe_i32 v12, v19, 0, 16 +; SI-NEXT: v_bfe_i32 v14, v17, 0, 16 +; SI-NEXT: v_bfe_i32 v16, v16, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; SI-NEXT: ds_write2_b64 v18, v[3:4], v[8:9] offset0:2 offset1:3 +; SI-NEXT: v_bfe_i32 v3, v15, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 +; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; SI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; SI-NEXT: ds_write2_b64 v18, v[10:11], v[3:4] offset0:12 offset1:13 +; SI-NEXT: ds_write2_b64 v18, v[7:8], v[16:17] offset0:8 offset1:9 +; SI-NEXT: ds_write2_b64 v18, v[5:6], v[14:15] offset0:4 offset1:5 +; SI-NEXT: ds_write2_b64 v18, v[1:2], v[12:13] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v16i16_to_v16i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v19, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v3 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v4 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:8 offset1:9 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v4, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[4:5], v[14:15] offset0:10 offset1:11 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v6 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:12 offset1:13 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v7 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v16, v7 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v18, 0, 16 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v18, v3 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:14 offset1:15 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v10, v9, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v12, v11, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[2:3], v[12:13] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[10:11] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[8:9] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v16i16_to_v16i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v4 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v19, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v3 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v4 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[4:5], v[14:15] offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v6 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v7 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v16, v7 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v18, 0, 16 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v18, v3 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v9, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v11, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[2:3], v[12:13] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[10:11] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[8:9] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v16i16_to_v16i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 101, @47, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: MOV * T1.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: BFE_INT T2.W, T1.W, 0.0, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.y, +; EG-NEXT: 16(2.242078e-44), 28(3.923636e-44) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV * T2.Z, OQAP, +; EG-NEXT: BFE_INT T3.Z, T2.Y, 0.0, literal.x, +; EG-NEXT: ASHR T3.W, T2.W, literal.y, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: BFE_INT T4.Z, T0.Y, 0.0, literal.x, +; EG-NEXT: ASHR T3.W, T3.Z, literal.y, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: BFE_INT T5.Z, T0.Z, 0.0, literal.x, +; EG-NEXT: ASHR T3.W, T4.Z, literal.y, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: BFE_INT T6.Z, T0.W, 0.0, literal.x, +; EG-NEXT: ASHR T3.W, T5.Z, literal.y, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: BFE_INT T7.Z, T1.Y, 0.0, literal.x, +; EG-NEXT: ASHR T3.W, T6.Z, literal.y, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 68(9.528830e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: BFE_INT T8.Z, T1.Z, 0.0, literal.x, +; EG-NEXT: ASHR T3.W, T7.Z, literal.y, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 84(1.177091e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: BFE_INT T9.Z, T2.Z, 0.0, literal.x, +; EG-NEXT: ASHR T3.W, T8.Z, literal.y, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 100(1.401298e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: ASHR T3.W, T9.Z, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 116(1.625506e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: ASHR T3.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: ASHR T1.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T3.W, T1.W, +; EG-NEXT: MOV * T1.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T1.W, T2.W, +; EG-NEXT: ASHR T1.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ASHR T1.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T3.Z, +; EG-NEXT: ASHR T1.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 44(6.165713e-44) +; EG-NEXT: ALU 62, @48, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ASHR T1.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T4.Z, +; EG-NEXT: ASHR T1.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ASHR T1.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T5.Z, +; EG-NEXT: ASHR T1.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 76(1.064987e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: ASHR T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 72(1.008935e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T6.Z, +; EG-NEXT: ASHR T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 92(1.289195e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 88(1.233143e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T7.Z, +; EG-NEXT: ASHR T0.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 108(1.513402e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 104(1.457350e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T8.Z, +; EG-NEXT: ASHR T0.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 124(1.737610e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 120(1.681558e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T9.Z, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v16i16_to_v16i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b128 v[3:6], v0 +; VI-DS128-NEXT: ds_read_b128 v[7:10], v0 offset:16 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_mov_b32_e32 v18, v6 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_bfe_i32 v11, v8, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v8 +; VI-DS128-NEXT: v_bfe_i32 v13, v8, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-DS128-NEXT: v_mov_b32_e32 v8, s0 +; VI-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:80 +; VI-DS128-NEXT: v_bfe_i32 v11, v7, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7 +; VI-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16 +; VI-DS128-NEXT: v_mov_b32_e32 v15, v10 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v10 +; VI-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:64 +; VI-DS128-NEXT: v_bfe_i32 v11, v15, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v9 +; VI-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:112 +; VI-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; VI-DS128-NEXT: v_bfe_i32 v10, v4, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v5 +; VI-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:96 +; VI-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v16, v19, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v0, v3, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v12, v4, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v6, v7, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; VI-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:48 +; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:32 +; VI-DS128-NEXT: ds_write_b128 v8, v[10:13] offset:16 +; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v16i16_to_v16i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[3:6], v0 +; GFX9-DS128-NEXT: ds_read_b128 v[7:10], v0 offset:16 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_bfe_i32 v0, v3, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v8 +; GFX9-DS128-NEXT: v_bfe_i32 v11, v8, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v3, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_mov_b32_e32 v8, s0 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:80 +; GFX9-DS128-NEXT: v_bfe_i32 v11, v7, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v15, v10 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v10 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:64 +; GFX9-DS128-NEXT: v_bfe_i32 v11, v15, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v9 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:112 +; GFX9-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v18, v6 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-DS128-NEXT: v_bfe_i32 v10, v4, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v5 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:96 +; GFX9-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v16, v19, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v12, v4, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v7, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[10:13] offset:16 +; GFX9-DS128-NEXT: ds_write_b128 v8, v[0:3] +; GFX9-DS128-NEXT: s_endpgm %load = load <16 x i16>, ptr addrspace(3) %in %ext = sext <16 x i16> %load to <16 x i64> store <16 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_zextload_v32i16_to_v32i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[2:5], v0 offset0:2 offset1:3 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: ds_read2_b64 v[6:9], v0 offset1:1 +; SI-NEXT: v_mov_b32_e32 v19, v1 +; SI-NEXT: v_mov_b32_e32 v21, v1 +; SI-NEXT: v_mov_b32_e32 v22, s0 +; SI-NEXT: s_waitcnt lgkmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v5 +; SI-NEXT: v_and_b32_e32 v20, 0xffff, v5 +; SI-NEXT: ds_read2_b64 v[10:13], v0 offset0:4 offset1:5 +; SI-NEXT: ds_read2_b64 v[14:17], v0 offset0:6 offset1:7 +; SI-NEXT: ds_write2_b64 v22, v[20:21], v[18:19] offset0:14 offset1:15 +; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v3 +; SI-NEXT: v_and_b32_e32 v20, 0xffff, v3 +; SI-NEXT: ds_write2_b64 v22, v[20:21], v[18:19] offset0:10 offset1:11 +; SI-NEXT: s_waitcnt lgkmcnt(4) +; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v9 +; SI-NEXT: v_and_b32_e32 v20, 0xffff, v9 +; SI-NEXT: ds_write2_b64 v22, v[20:21], v[18:19] offset0:6 offset1:7 +; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v7 +; SI-NEXT: v_and_b32_e32 v20, 0xffff, v7 +; SI-NEXT: ds_write2_b64 v22, v[20:21], v[18:19] offset0:2 offset1:3 +; SI-NEXT: s_waitcnt lgkmcnt(4) +; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v17 +; SI-NEXT: v_and_b32_e32 v20, 0xffff, v17 +; SI-NEXT: ds_write2_b64 v22, v[20:21], v[18:19] offset0:30 offset1:31 +; SI-NEXT: v_mov_b32_e32 v18, v1 +; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v15 +; SI-NEXT: v_mov_b32_e32 v20, v1 +; SI-NEXT: v_and_b32_e32 v19, 0xffff, v15 +; SI-NEXT: ds_write2_b64 v22, v[19:20], v[17:18] offset0:26 offset1:27 +; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v13 +; SI-NEXT: v_and_b32_e32 v19, 0xffff, v13 +; SI-NEXT: ds_write2_b64 v22, v[19:20], v[17:18] offset0:22 offset1:23 +; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v4 +; SI-NEXT: v_mov_b32_e32 v5, v1 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; SI-NEXT: ds_write2_b64 v22, v[4:5], v[17:18] offset0:12 offset1:13 +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; SI-NEXT: v_and_b32_e32 v17, 0xffff, v2 +; SI-NEXT: v_mov_b32_e32 v4, v1 +; SI-NEXT: ds_write2_b64 v22, v[17:18], v[3:4] offset0:8 offset1:9 +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v8 +; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v6 +; SI-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; SI-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; SI-NEXT: v_mov_b32_e32 v9, v1 +; SI-NEXT: v_mov_b32_e32 v7, v1 +; SI-NEXT: v_mov_b32_e32 v3, v1 +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v11 +; SI-NEXT: ds_write2_b64 v22, v[8:9], v[2:3] offset0:4 offset1:5 +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v12 +; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v10 +; SI-NEXT: ds_write2_b64 v22, v[6:7], v[4:5] offset1:1 +; SI-NEXT: v_and_b32_e32 v4, 0xffff, v10 +; SI-NEXT: v_and_b32_e32 v5, 0xffff, v11 +; SI-NEXT: v_and_b32_e32 v10, 0xffff, v12 +; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v16 +; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v14 +; SI-NEXT: v_and_b32_e32 v17, 0xffff, v14 +; SI-NEXT: v_and_b32_e32 v19, 0xffff, v16 +; SI-NEXT: v_mov_b32_e32 v6, v1 +; SI-NEXT: ds_write2_b64 v22, v[5:6], v[0:1] offset0:18 offset1:19 +; SI-NEXT: v_mov_b32_e32 v11, v1 +; SI-NEXT: v_mov_b32_e32 v5, v1 +; SI-NEXT: v_mov_b32_e32 v13, v1 +; SI-NEXT: v_mov_b32_e32 v16, v1 +; SI-NEXT: ds_write2_b64 v22, v[19:20], v[12:13] offset0:28 offset1:29 +; SI-NEXT: ds_write2_b64 v22, v[17:18], v[15:16] offset0:24 offset1:25 +; SI-NEXT: ds_write2_b64 v22, v[10:11], v[2:3] offset0:20 offset1:21 +; SI-NEXT: ds_write2_b64 v22, v[4:5], v[8:9] offset0:16 offset1:17 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_zextload_v32i16_to_v32i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v5, 0 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v19, v5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v21, v5 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_read2_b64 v[6:9], v4 offset0:4 offset1:5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v22, s0 +; VI-NO-DS128-NEXT: ds_read2_b64 v[10:13], v4 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_read2_b64 v[14:17], v4 offset1:1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(3) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v20, 16, v2 +; VI-NO-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v2 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[18:19], v[20:21] offset0:28 offset1:29 +; VI-NO-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v1 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v2, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[18:19], v[1:2] offset0:26 offset1:27 +; VI-NO-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v0 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[1:2], v[18:19] offset0:24 offset1:25 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(5) +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v9 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v1, v5 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v9 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[0:1], v[18:19] offset0:22 offset1:23 +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v8 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v8 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v9, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[0:1], v[8:9] offset0:20 offset1:21 +; VI-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v7 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v8, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[0:1], v[7:8] offset0:18 offset1:19 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v6 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v7, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[0:1] offset0:16 offset1:17 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(8) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v13 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v13 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[0:1] offset0:14 offset1:15 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v12 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v12 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[0:1] offset0:12 offset1:13 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v11 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v11 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[0:1] offset0:10 offset1:11 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(10) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v14 +; VI-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v14 +; VI-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v3 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v14, v5 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v10 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v10 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v10, 16, v17 +; VI-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v17 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[4:5], v[13:14] offset0:30 offset1:31 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v13, v5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v11, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[1:2] offset0:8 offset1:9 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v16 +; VI-NO-DS128-NEXT: v_and_b32_e32 v9, 0xffff, v16 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[12:13], v[10:11] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v10, v5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v3, v5 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v15 +; VI-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v15 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[9:10], v[2:3] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v2, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[1:2] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v9, v5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v1, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v22, v[8:9], v[0:1] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_zextload_v32i16_to_v32i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v5, 0 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v19, v5 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v21, v5 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[6:9], v4 offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v22, s0 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[10:13], v4 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[14:17], v4 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(2) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v20, 16, v2 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v2 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[18:19], v[20:21] offset0:28 offset1:29 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v18, 0xffff, v1 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[18:19], v[1:2] offset0:26 offset1:27 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v0 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[1:2], v[18:19] offset0:24 offset1:25 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v9 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v9 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[0:1], v[18:19] offset0:22 offset1:23 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v8 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v8 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v9, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[0:1], v[8:9] offset0:20 offset1:21 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v7 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[0:1], v[7:8] offset0:18 offset1:19 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v6 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v6 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v7, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[0:1] offset0:16 offset1:17 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(7) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v17 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v17 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[0:1] offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v16 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v16 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[0:1] offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v15 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v15 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[0:1] offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v14 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v14 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v10 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v8, 0xffff, v10 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[1:2] offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v12 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v9, 0xffff, v12 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v10, 16, v13 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v13 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v3 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v14, v5 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v11 +; GFX9-NO-DS128-NEXT: v_and_b32_e32 v6, 0xffff, v11 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[4:5], v[13:14] offset0:30 offset1:31 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v13, v5 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v11, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[12:13], v[10:11] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v10, v5 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v3, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[9:10], v[2:3] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[6:7], v[1:2] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v9, v5 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v22, v[8:9], v[0:1] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_zextload_v32i16_to_v32i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 105, @49, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 56(7.847271e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.W, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 40(5.605194e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T1.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T2.W +; EG-NEXT: MOV T2.W, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Y, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.Z, OQAP, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T3.W +; EG-NEXT: MOV T3.W, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Y, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.Z, OQAP, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T4.W +; EG-NEXT: MOV T4.W, OQAP, +; EG-NEXT: MOV * T5.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV T5.Y, OQAP, +; EG-NEXT: LSHR T5.W, T4.W, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T6.W, T5.W, +; EG-NEXT: AND_INT T4.W, T4.W, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T5.Y, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T4.W, T5.Y, literal.x, +; EG-NEXT: MOV * T5.W, KC0[2].Y, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T4.Z, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T4.W, T4.Z, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T4.Y, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T4.W, T4.Y, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: LSHR T4.W, T3.W, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 88(1.233143e-43) +; EG-NEXT: LDS_WRITE * T5.W, T4.W, +; EG-NEXT: AND_INT T3.W, T3.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 72(1.008935e-43) +; EG-NEXT: ALU 93, @50, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T3.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T3.Y, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 120(1.681558e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T3.W, T3.Y, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: LSHR T3.W, T2.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 104(1.457350e-43) +; EG-NEXT: LDS_WRITE * T4.W, T3.W, +; EG-NEXT: AND_INT T2.W, T2.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 152(2.129974e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 136(1.905766e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T2.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: LSHR T2.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 184(2.578389e-43) +; EG-NEXT: LDS_WRITE * T3.W, T2.W, +; EG-NEXT: AND_INT T1.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 168(2.354181e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 216(3.026805e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T1.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: LSHR T1.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 200(2.802597e-43) +; EG-NEXT: LDS_WRITE * T2.W, T1.W, +; EG-NEXT: AND_INT T0.W, T0.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 248(3.475220e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 240(3.363116e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: LSHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 232(3.251012e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: AND_INT T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 65535(9.183409e-41), 224(3.138909e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: MOV * T1.W, literal.y, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: ALU 87, @51, KC0[CB0:0-32], KC1[] +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 92(1.289195e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 84(1.177091e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 76(1.064987e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 68(9.528830e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 124(1.737610e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 116(1.625506e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 108(1.513402e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 100(1.401298e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 156(2.186026e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 148(2.073922e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 140(1.961818e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 132(1.849714e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 188(2.634441e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 180(2.522337e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 172(2.410233e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 164(2.298129e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 220(3.082857e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 212(2.970753e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 204(2.858649e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 196(2.746545e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 252(3.531272e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 244(3.419168e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 236(3.307064e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 228(3.194960e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T1.W, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_zextload_v32i16_to_v32i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v1, s1 +; VI-DS128-NEXT: ds_read_b128 v[3:6], v1 +; VI-DS128-NEXT: ds_read_b128 v[7:10], v1 offset:16 +; VI-DS128-NEXT: v_mov_b32_e32 v52, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v6 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v8 +; VI-DS128-NEXT: v_and_b32_e32 v17, 0xffff, v8 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v22, 16, v7 +; VI-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v7 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v25, 16, v10 +; VI-DS128-NEXT: v_and_b32_e32 v23, 0xffff, v10 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v28, 16, v9 +; VI-DS128-NEXT: v_and_b32_e32 v26, 0xffff, v9 +; VI-DS128-NEXT: ds_read_b128 v[7:10], v1 offset:32 +; VI-DS128-NEXT: ds_read_b128 v[29:32], v1 offset:48 +; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v6 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v4 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v38, 16, v7 +; VI-DS128-NEXT: v_and_b32_e32 v36, 0xffff, v7 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v44, 16, v9 +; VI-DS128-NEXT: v_and_b32_e32 v42, 0xffff, v9 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v30 +; VI-DS128-NEXT: v_and_b32_e32 v7, 0xffff, v30 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v32 +; VI-DS128-NEXT: v_and_b32_e32 v48, 0xffff, v32 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v32, 16, v31 +; VI-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v31, 0 +; VI-DS128-NEXT: v_mov_b32_e32 v49, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v51, v31 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v47, 16, v29 +; VI-DS128-NEXT: v_and_b32_e32 v45, 0xffff, v29 +; VI-DS128-NEXT: ds_write_b128 v52, v[48:51] offset:240 +; VI-DS128-NEXT: v_mov_b32_e32 v46, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v48, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v27, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v29, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[45:48] offset:192 +; VI-DS128-NEXT: v_mov_b32_e32 v43, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v45, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[26:29] offset:96 +; VI-DS128-NEXT: v_mov_b32_e32 v24, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v26, v31 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v41, 16, v10 +; VI-DS128-NEXT: v_and_b32_e32 v39, 0xffff, v10 +; VI-DS128-NEXT: ds_write_b128 v52, v[42:45] offset:160 +; VI-DS128-NEXT: v_mov_b32_e32 v40, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v42, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[23:26] offset:112 +; VI-DS128-NEXT: v_mov_b32_e32 v21, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v23, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[39:42] offset:176 +; VI-DS128-NEXT: v_mov_b32_e32 v37, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v39, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[20:23] offset:64 +; VI-DS128-NEXT: v_mov_b32_e32 v18, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v20, v31 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v35, 16, v8 +; VI-DS128-NEXT: v_and_b32_e32 v33, 0xffff, v8 +; VI-DS128-NEXT: v_mov_b32_e32 v8, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v10, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[36:39] offset:128 +; VI-DS128-NEXT: v_mov_b32_e32 v34, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v36, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[17:20] offset:80 +; VI-DS128-NEXT: v_mov_b32_e32 v15, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v17, v31 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v3 +; VI-DS128-NEXT: v_and_b32_e32 v11, 0xffff, v3 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v5 +; VI-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v5 +; VI-DS128-NEXT: ds_write_b128 v52, v[7:10] offset:208 +; VI-DS128-NEXT: ds_write_b128 v52, v[33:36] offset:144 +; VI-DS128-NEXT: v_mov_b32_e32 v5, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v7, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v33, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[14:17] offset:48 +; VI-DS128-NEXT: v_mov_b32_e32 v12, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v14, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v1, v31 +; VI-DS128-NEXT: v_mov_b32_e32 v3, v31 +; VI-DS128-NEXT: ds_write_b128 v52, v[4:7] offset:32 +; VI-DS128-NEXT: ds_write_b128 v52, v[30:33] offset:224 +; VI-DS128-NEXT: ds_write_b128 v52, v[11:14] +; VI-DS128-NEXT: ds_write_b128 v52, v[0:3] offset:16 +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_zextload_v32i16_to_v32i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[3:6], v1 +; GFX9-DS128-NEXT: ds_read_b128 v[7:10], v1 offset:16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v52, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v6 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v8 +; GFX9-DS128-NEXT: v_and_b32_e32 v17, 0xffff, v8 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v22, 16, v7 +; GFX9-DS128-NEXT: v_and_b32_e32 v20, 0xffff, v7 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v25, 16, v10 +; GFX9-DS128-NEXT: v_and_b32_e32 v23, 0xffff, v10 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v28, 16, v9 +; GFX9-DS128-NEXT: v_and_b32_e32 v26, 0xffff, v9 +; GFX9-DS128-NEXT: ds_read_b128 v[7:10], v1 offset:32 +; GFX9-DS128-NEXT: ds_read_b128 v[29:32], v1 offset:48 +; GFX9-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v6 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v4 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v38, 16, v7 +; GFX9-DS128-NEXT: v_and_b32_e32 v36, 0xffff, v7 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v44, 16, v9 +; GFX9-DS128-NEXT: v_and_b32_e32 v42, 0xffff, v9 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v30 +; GFX9-DS128-NEXT: v_and_b32_e32 v7, 0xffff, v30 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v32 +; GFX9-DS128-NEXT: v_and_b32_e32 v48, 0xffff, v32 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v32, 16, v31 +; GFX9-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v31, 0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v49, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v51, v31 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v47, 16, v29 +; GFX9-DS128-NEXT: v_and_b32_e32 v45, 0xffff, v29 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[48:51] offset:240 +; GFX9-DS128-NEXT: v_mov_b32_e32 v46, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v48, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v27, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v29, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[45:48] offset:192 +; GFX9-DS128-NEXT: v_mov_b32_e32 v43, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v45, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[26:29] offset:96 +; GFX9-DS128-NEXT: v_mov_b32_e32 v24, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v26, v31 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v41, 16, v10 +; GFX9-DS128-NEXT: v_and_b32_e32 v39, 0xffff, v10 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[42:45] offset:160 +; GFX9-DS128-NEXT: v_mov_b32_e32 v40, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v42, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[23:26] offset:112 +; GFX9-DS128-NEXT: v_mov_b32_e32 v21, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v23, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[39:42] offset:176 +; GFX9-DS128-NEXT: v_mov_b32_e32 v37, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v39, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[20:23] offset:64 +; GFX9-DS128-NEXT: v_mov_b32_e32 v18, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v20, v31 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v35, 16, v8 +; GFX9-DS128-NEXT: v_and_b32_e32 v33, 0xffff, v8 +; GFX9-DS128-NEXT: v_mov_b32_e32 v8, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v10, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[36:39] offset:128 +; GFX9-DS128-NEXT: v_mov_b32_e32 v34, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v36, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[17:20] offset:80 +; GFX9-DS128-NEXT: v_mov_b32_e32 v15, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v17, v31 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v3 +; GFX9-DS128-NEXT: v_and_b32_e32 v11, 0xffff, v3 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v5 +; GFX9-DS128-NEXT: v_and_b32_e32 v4, 0xffff, v5 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[7:10] offset:208 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[33:36] offset:144 +; GFX9-DS128-NEXT: v_mov_b32_e32 v5, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v7, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v33, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[14:17] offset:48 +; GFX9-DS128-NEXT: v_mov_b32_e32 v12, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v14, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v1, v31 +; GFX9-DS128-NEXT: v_mov_b32_e32 v3, v31 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[4:7] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[30:33] offset:224 +; GFX9-DS128-NEXT: ds_write_b128 v52, v[11:14] +; GFX9-DS128-NEXT: ds_write_b128 v52, v[0:3] offset:16 +; GFX9-DS128-NEXT: s_endpgm %load = load <32 x i16>, ptr addrspace(3) %in %ext = zext <32 x i16> %load to <32 x i64> store <32 x i64> %ext, ptr addrspace(3) %out ret void } -; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64: -; GFX9-NOT: m0 -; SICIVI: s_mov_b32 m0 - - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR -; EG-DAG: BFE_INT -; EG-DAG: BFE_INT -; EG-DAG: ASHR -; EG-DAG: ASHR define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 { +; SI-LABEL: local_sextload_v32i16_to_v32i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v12, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[4:7], v12 offset0:2 offset1:3 +; SI-NEXT: ds_read2_b64 v[0:3], v12 offset1:1 +; SI-NEXT: ds_read2_b64 v[8:11], v12 offset0:6 offset1:7 +; SI-NEXT: ds_read2_b64 v[12:15], v12 offset0:4 offset1:5 +; SI-NEXT: s_waitcnt lgkmcnt(3) +; SI-NEXT: v_mov_b32_e32 v18, v7 +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v7 +; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v7 +; SI-NEXT: v_bfe_i32 v18, v18, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; SI-NEXT: v_mov_b32_e32 v7, s0 +; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:14 offset1:15 +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v5 +; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v5 +; SI-NEXT: v_bfe_i32 v18, v5, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:10 offset1:11 +; SI-NEXT: s_waitcnt lgkmcnt(4) +; SI-NEXT: v_mov_b32_e32 v5, v3 +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v3 +; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v3 +; SI-NEXT: v_bfe_i32 v18, v5, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:6 offset1:7 +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v1 +; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v1 +; SI-NEXT: v_bfe_i32 v18, v1, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:2 offset1:3 +; SI-NEXT: s_waitcnt lgkmcnt(5) +; SI-NEXT: v_mov_b32_e32 v1, v11 +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v11 +; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v11 +; SI-NEXT: v_bfe_i32 v18, v1, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:30 offset1:31 +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v9 +; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v9 +; SI-NEXT: v_bfe_i32 v18, v9, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:26 offset1:27 +; SI-NEXT: s_waitcnt lgkmcnt(6) +; SI-NEXT: v_mov_b32_e32 v1, v15 +; SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; SI-NEXT: v_ashrrev_i32_e32 v15, 16, v15 +; SI-NEXT: v_bfe_i32 v17, v1, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; SI-NEXT: ds_write2_b64 v7, v[17:18], v[15:16] offset0:22 offset1:23 +; SI-NEXT: v_ashrrev_i32_e32 v16, 31, v13 +; SI-NEXT: v_ashrrev_i32_e32 v15, 16, v13 +; SI-NEXT: v_bfe_i32 v17, v13, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; SI-NEXT: ds_write2_b64 v7, v[17:18], v[15:16] offset0:18 offset1:19 +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 +; SI-NEXT: v_bfe_i32 v5, v6, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; SI-NEXT: v_bfe_i32 v15, v1, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; SI-NEXT: ds_write2_b64 v7, v[5:6], v[15:16] offset0:12 offset1:13 +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v10 +; SI-NEXT: v_bfe_i32 v3, v4, 0, 16 +; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v8 +; SI-NEXT: v_bfe_i32 v5, v1, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; SI-NEXT: ds_write2_b64 v7, v[3:4], v[5:6] offset0:8 offset1:9 +; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v14 +; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v12 +; SI-NEXT: v_bfe_i32 v1, v12, 0, 16 +; SI-NEXT: v_bfe_i32 v3, v14, 0, 16 +; SI-NEXT: v_bfe_i32 v5, v8, 0, 16 +; SI-NEXT: v_bfe_i32 v8, v10, 0, 16 +; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v0 +; SI-NEXT: v_bfe_i32 v9, v0, 0, 16 +; SI-NEXT: v_bfe_i32 v10, v2, 0, 16 +; SI-NEXT: v_bfe_i32 v12, v11, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; SI-NEXT: ds_write2_b64 v7, v[10:11], v[12:13] offset0:4 offset1:5 +; SI-NEXT: v_bfe_i32 v11, v6, 0, 16 +; SI-NEXT: v_bfe_i32 v13, v4, 0, 16 +; SI-NEXT: v_bfe_i32 v15, v15, 0, 16 +; SI-NEXT: v_bfe_i32 v16, v14, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; SI-NEXT: ds_write2_b64 v7, v[9:10], v[16:17] offset1:1 +; SI-NEXT: v_bfe_i32 v17, v18, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; SI-NEXT: ds_write2_b64 v7, v[8:9], v[17:18] offset0:28 offset1:29 +; SI-NEXT: ds_write2_b64 v7, v[5:6], v[15:16] offset0:24 offset1:25 +; SI-NEXT: ds_write2_b64 v7, v[3:4], v[13:14] offset0:20 offset1:21 +; SI-NEXT: ds_write2_b64 v7, v[1:2], v[11:12] offset0:16 offset1:17 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_sextload_v32i16_to_v32i64: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v7, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v7 offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_read2_b64 v[12:15], v7 offset0:4 offset1:5 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v11, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v18, v3, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; VI-NO-DS128-NEXT: ds_read2_b64 v[3:6], v7 offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_read2_b64 v[7:10], v7 offset1:1 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[18:19], v[16:17] offset0:30 offset1:31 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v2 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v18, v2, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[18:19], v[16:17] offset0:28 offset1:29 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v2, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v1, v1, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[1:2], v[16:17] offset0:26 offset1:27 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v0 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v0, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v18, v17, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[16:17], v[18:19] offset0:24 offset1:25 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(6) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v15 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v18, v15, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v14 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[18:19], v[16:17] offset0:22 offset1:23 +; VI-NO-DS128-NEXT: v_bfe_i32 v15, v15, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v17, v14, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v13 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[15:16] offset0:20 offset1:21 +; VI-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v16, v13, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[16:17], v[14:15] offset0:18 offset1:19 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v12 +; VI-NO-DS128-NEXT: v_bfe_i32 v15, v12, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v17, v16, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[15:16], v[17:18] offset0:16 offset1:17 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(9) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v6 +; VI-NO-DS128-NEXT: v_bfe_i32 v15, v15, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v17, v6, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v5 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[15:16] offset0:14 offset1:15 +; VI-NO-DS128-NEXT: v_bfe_i32 v15, v6, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v5, v5, 0, 16 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v12, 16, v4 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(9) +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[5:6], v[15:16] offset0:12 offset1:13 +; VI-NO-DS128-NEXT: v_bfe_i32 v5, v12, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v15, v4, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v13, v0, 0, 16 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v9 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[15:16], v[5:6] offset0:10 offset1:11 +; VI-NO-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v3 +; VI-NO-DS128-NEXT: v_bfe_i32 v17, v3, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v21, v0, 0, 16 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v10 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v22, 31, v21 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v7 +; VI-NO-DS128-NEXT: v_bfe_i32 v19, v19, 0, 16 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[21:22] offset0:8 offset1:9 +; VI-NO-DS128-NEXT: v_bfe_i32 v17, v10, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v1, v1, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v4, v7, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v6, v8, 0, 16 +; VI-NO-DS128-NEXT: v_bfe_i32 v8, v9, 0, 16 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v20, 31, v19 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[19:20] offset0:6 offset1:7 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[8:9], v[15:16] offset0:4 offset1:5 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[6:7], v[13:14] offset0:2 offset1:3 +; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[4:5], v[1:2] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_sextload_v32i16_to_v32i64: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v8 offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v8 offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v15, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v7 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v9, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v7, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[11:14], v8 offset1:1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[7:10], v8 offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[18:19], v[16:17] offset0:30 offset1:31 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v6 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v6, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[18:19], v[16:17] offset0:28 offset1:29 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v6, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v5, v5, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[5:6], v[16:17] offset0:26 offset1:27 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v4 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v17, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(5) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[18:19] offset0:24 offset1:25 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[3:4], v[16:17] offset0:22 offset1:23 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[3:4] offset0:20 offset1:21 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v1, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[2:3] offset0:18 offset1:19 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[3:4], v[16:17] offset0:16 offset1:17 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(8) +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v10 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v10, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[3:4] offset0:14 offset1:15 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v9 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v3, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v9, v9, 0, 16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[9:10], v[3:4] offset0:12 offset1:13 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v20, 16, v12 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[8:9], v[3:4] offset0:10 offset1:11 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v7 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v1, v20, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v7, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v20, v4, 0, 16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v13 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v14 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, v14 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v21, 31, v20 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v11 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v12, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v18, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v19, 0, 16 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[20:21] offset0:8 offset1:9 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v0, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v5, v5, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v11, 0, 16 +; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v13, 0, 16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[18:19] offset0:6 offset1:7 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[10:11], v[12:13] offset0:4 offset1:5 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[8:9], v[1:2] offset0:2 offset1:3 +; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[3:4], v[5:6] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_sextload_v32i16_to_v32i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 107, @52, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T1.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T1.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T1.W, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T2.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T2.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T2.W, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T3.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 44(6.165713e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T3.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 40(5.605194e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T3.W, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T4.Y, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T4.Z, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 60(8.407791e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T4.W, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T5.Y, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T5.Z, OQAP, +; EG-NEXT: BFE_INT T0.W, T5.Y, 0.0, literal.x, +; EG-NEXT: ADD_INT * T5.W, KC0[2].Z, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_READ_RET * OQAP, T5.W +; EG-NEXT: MOV * T5.W, OQAP, +; EG-NEXT: BFE_INT T0.Z, T5.Z, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T0.W, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T6.Z, T0.Y, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T0.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T7.Z, T1.Y, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T6.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 52(7.286752e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T8.Z, T1.Z, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T7.Z, literal.y, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 36(5.044674e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T9.Z, T1.W, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T8.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 84(1.177091e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T10.Z, T2.Y, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T9.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 68(9.528830e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T11.Z, T2.Z, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T10.Z, literal.y, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 116(1.625506e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT * T12.Z, T2.W, 0.0, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ALU 98, @53, KC0[CB0:0-32], KC1[] +; EG-NEXT: ASHR T6.W, T11.Z, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 100(1.401298e-43) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T13.Z, T3.Y, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T12.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 148(2.073922e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T14.Z, T3.Z, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T13.Z, literal.y, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 132(1.849714e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T15.Z, T3.W, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T14.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 180(2.522337e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T16.Z, T4.Y, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T15.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 164(2.298129e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T17.Z, T4.Z, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T16.Z, literal.y, BS:VEC_120/SCL_212 +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 212(2.970753e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T18.Z, T4.W, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T17.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 196(2.746545e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: BFE_INT T19.Z, T5.W, 0.0, literal.x, +; EG-NEXT: ASHR T6.W, T18.Z, literal.y, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.z, +; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) +; EG-NEXT: 244(3.419168e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: ASHR T6.W, T19.Z, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 228(3.194960e-43) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: ASHR T6.W, T5.Y, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 28(3.923636e-44) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: ASHR T6.W, T5.Y, literal.x, +; EG-NEXT: ADD_INT * T7.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 24(3.363116e-44) +; EG-NEXT: LDS_WRITE * T7.W, T6.W, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ASHR T0.W, T5.Z, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 12(1.681558e-44) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ASHR T0.W, T5.Z, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.Z, +; EG-NEXT: ASHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 60(8.407791e-44) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ASHR T0.W, T0.Y, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 56(7.847271e-44) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T6.Z, +; EG-NEXT: ASHR T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 44(6.165713e-44) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ASHR T0.W, T1.Y, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 40(5.605194e-44) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T7.Z, +; EG-NEXT: ASHR T0.W, T1.Z, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 92(1.289195e-43) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ASHR * T0.W, T1.Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ALU 99, @54, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.x, +; EG-NEXT: 88(1.233143e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T8.Z, +; EG-NEXT: ASHR T0.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 76(1.064987e-43) +; EG-NEXT: LDS_WRITE * T6.W, T0.W, +; EG-NEXT: ASHR T0.W, T1.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 72(1.008935e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T9.Z, +; EG-NEXT: ASHR T0.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 124(1.737610e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T2.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 120(1.681558e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T10.Z, +; EG-NEXT: ASHR T0.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 108(1.513402e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T2.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 104(1.457350e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T11.Z, +; EG-NEXT: ASHR T0.W, T2.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 156(2.186026e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T2.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 152(2.129974e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 144(2.017870e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T12.Z, +; EG-NEXT: ASHR T0.W, T3.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 140(1.961818e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T3.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 136(1.905766e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 128(1.793662e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T13.Z, +; EG-NEXT: ASHR T0.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 188(2.634441e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T3.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 184(2.578389e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T14.Z, +; EG-NEXT: ASHR T0.W, T3.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 172(2.410233e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T3.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 168(2.354181e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T15.Z, +; EG-NEXT: ASHR T0.W, T4.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 220(3.082857e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T4.Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 216(3.026805e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T16.Z, +; EG-NEXT: ASHR T0.W, T4.Z, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 204(2.858649e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR * T0.W, T4.Z, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: ALU 27, @55, KC0[CB0:0-32], KC1[] +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, +; EG-NEXT: 200(2.802597e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T17.Z, +; EG-NEXT: ASHR T0.W, T4.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 252(3.531272e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T4.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 248(3.475220e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T18.Z, +; EG-NEXT: ASHR T0.W, T5.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 31(4.344025e-44), 236(3.307064e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ASHR T0.W, T5.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 232(3.251012e-43) +; EG-NEXT: LDS_WRITE * T1.W, T0.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T19.Z, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_sextload_v32i16_to_v32i64: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v4, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v4 offset:48 +; VI-DS128-NEXT: ds_read_b128 v[9:12], v4 offset:32 +; VI-DS128-NEXT: v_mov_b32_e32 v8, s0 +; VI-DS128-NEXT: ds_read_b128 v[17:20], v4 offset:16 +; VI-DS128-NEXT: ds_read_b128 v[4:7], v4 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(3) +; VI-DS128-NEXT: v_bfe_i32 v13, v2, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; VI-DS128-NEXT: v_bfe_i32 v15, v2, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-DS128-NEXT: v_mov_b32_e32 v2, v3 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:224 +; VI-DS128-NEXT: v_bfe_i32 v13, v2, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v15, v3, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:240 +; VI-DS128-NEXT: v_bfe_i32 v15, v2, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; VI-DS128-NEXT: v_bfe_i32 v13, v0, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v0, v1, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:208 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(5) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v11 +; VI-DS128-NEXT: v_bfe_i32 v0, v11, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:192 +; VI-DS128-NEXT: v_mov_b32_e32 v13, v12 +; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:160 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v12 +; VI-DS128-NEXT: v_bfe_i32 v0, v13, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v10 +; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:176 +; VI-DS128-NEXT: v_bfe_i32 v0, v9, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v9 +; VI-DS128-NEXT: v_bfe_i32 v9, v10, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v11, v11, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:144 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(8) +; VI-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v19 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: v_bfe_i32 v9, v19, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v11, v11, 0, 16 +; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:128 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(8) +; VI-DS128-NEXT: v_bfe_i32 v0, v5, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v5 +; VI-DS128-NEXT: v_mov_b32_e32 v5, v20 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:96 +; VI-DS128-NEXT: v_bfe_i32 v9, v5, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v20 +; VI-DS128-NEXT: v_bfe_i32 v11, v5, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v17 +; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:112 +; VI-DS128-NEXT: v_bfe_i32 v9, v17, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v11, v5, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v18 +; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:64 +; VI-DS128-NEXT: v_bfe_i32 v9, v4, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; VI-DS128-NEXT: v_bfe_i32 v13, v18, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v15, v5, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v11, v4, 0, 16 +; VI-DS128-NEXT: v_mov_b32_e32 v4, v7 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:80 +; VI-DS128-NEXT: v_bfe_i32 v13, v4, 0, 16 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v7 +; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; VI-DS128-NEXT: v_bfe_i32 v15, v4, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v4, v6, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v6, v7, 0, 16 +; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:32 +; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:48 +; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] +; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:16 +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_sextload_v32i16_to_v32i64: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v13, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[4:7], v13 offset:48 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v13 offset:32 +; GFX9-DS128-NEXT: v_mov_b32_e32 v12, s0 +; GFX9-DS128-NEXT: ds_read_b128 v[8:11], v13 +; GFX9-DS128-NEXT: ds_read_b128 v[18:21], v13 offset:16 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(3) +; GFX9-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX9-DS128-NEXT: v_bfe_i32 v16, v6, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[14:17] offset:224 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v6, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v15, v7, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v4 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:240 +; GFX9-DS128-NEXT: v_bfe_i32 v15, v6, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v5 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v4, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v6, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[4:7] offset:208 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(5) +; GFX9-DS128-NEXT: v_bfe_i32 v4, v2, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v2, 0, 16 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:192 +; GFX9-DS128-NEXT: v_mov_b32_e32 v13, v3 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[4:7] offset:160 +; GFX9-DS128-NEXT: v_bfe_i32 v4, v13, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v2, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[4:7] offset:176 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v0, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v1, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v15, v6, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(6) +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v20 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v9 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:144 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v20, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v15, v1, 0, 16 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[2:5] offset:128 +; GFX9-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, v21 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:96 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v0, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v21 +; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v18 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:112 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v18, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v19 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:64 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v19, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v15, v1, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v6, v8, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v8, v0, 0, 16 +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, v11 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:80 +; GFX9-DS128-NEXT: v_bfe_i32 v13, v0, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v11 +; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16 +; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v10 +; GFX9-DS128-NEXT: v_bfe_i32 v17, v10, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v19, v0, 0, 16 +; GFX9-DS128-NEXT: v_bfe_i32 v2, v9, 0, 16 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v20, 31, v19 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13 +; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[17:20] offset:32 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:48 +; GFX9-DS128-NEXT: ds_write_b128 v12, v[6:9] +; GFX9-DS128-NEXT: ds_write_b128 v12, v[2:5] offset:16 +; GFX9-DS128-NEXT: s_endpgm %load = load <32 x i16>, ptr addrspace(3) %in %ext = sext <32 x i16> %load to <32 x i64> store <32 x i64> %ext, ptr addrspace(3) %out @@ -948,19 +8949,95 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out ; } ; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load. -; FUNC-LABEL: {{^}}local_v8i16_to_128: - -; SI-NOT: ds_read_b128 -; SI-NOT: ds_write_b128 - -; CIVI: ds_read_b128 -; CIVI: ds_write_b128 - -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET -; EG: LDS_READ_RET define amdgpu_kernel void @local_v8i16_to_128(ptr addrspace(3) %out, ptr addrspace(3) %in) { +; SI-LABEL: local_v8i16_to_128: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; SI-NEXT: v_mov_b32_e32 v4, s0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; SI-NEXT: s_endpgm +; +; VI-NO-DS128-LABEL: local_v8i16_to_128: +; VI-NO-DS128: ; %bb.0: +; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-NO-DS128-NEXT: s_mov_b32 m0, -1 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-NO-DS128-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; VI-NO-DS128-NEXT: s_endpgm +; +; GFX9-NO-DS128-LABEL: local_v8i16_to_128: +; GFX9-NO-DS128: ; %bb.0: +; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 +; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-DS128-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 +; GFX9-NO-DS128-NEXT: s_endpgm +; +; EG-LABEL: local_v8i16_to_128: +; EG: ; %bb.0: +; EG-NEXT: ALU 25, @56, KC0[CB0:0-32], KC1[] +; EG-NEXT: MOV * T0.W, KC0[2].Z, +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: MOV * T0.W, KC0[2].Y, +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_READ_RET * OQAP, T0.W +; EG-NEXT: MOV T0.X, OQAP, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) +; EG-NEXT: LDS_WRITE * T0.W, T0.X, +; EG-NEXT: RETURN +; +; VI-DS128-LABEL: local_v8i16_to_128: +; VI-DS128: ; %bb.0: +; VI-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-DS128-NEXT: s_mov_b32 m0, -1 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: v_mov_b32_e32 v0, s1 +; VI-DS128-NEXT: ds_read_b128 v[0:3], v0 +; VI-DS128-NEXT: v_mov_b32_e32 v4, s0 +; VI-DS128-NEXT: s_waitcnt lgkmcnt(0) +; VI-DS128-NEXT: ds_write_b128 v4, v[0:3] +; VI-DS128-NEXT: s_endpgm +; +; GFX9-DS128-LABEL: local_v8i16_to_128: +; GFX9-DS128: ; %bb.0: +; GFX9-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-DS128-NEXT: ds_read_b128 v[0:3], v0 +; GFX9-DS128-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DS128-NEXT: ds_write_b128 v4, v[0:3] +; GFX9-DS128-NEXT: s_endpgm %ld = load <8 x i16>, ptr addrspace(3) %in, align 16 store <8 x i16> %ld, ptr addrspace(3) %out, align 16 ret void diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll index be020457ce87d..9cc42ac448067 100644 --- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll @@ -9851,8 +9851,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6 ; CHECK-NEXT: s_cbranch_execz .LBB8_6 ; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader -; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 ; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0 +; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 ; CHECK-NEXT: s_movk_i32 s4, 0xf800 ; CHECK-NEXT: s_mov_b32 s5, -1 ; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop @@ -11167,8 +11167,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5 ; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6 ; ALIGNED-NEXT: s_cbranch_execz .LBB8_6 ; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader -; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 ; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0 +; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 ; ALIGNED-NEXT: s_movk_i32 s4, 0xf800 ; ALIGNED-NEXT: s_mov_b32 s5, -1 ; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop @@ -12381,8 +12381,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5 ; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024 ; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020 ; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016 -; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1 ; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0 +; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1 ; UNROLL3-NEXT: s_waitcnt vmcnt(3) ; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028 ; UNROLL3-NEXT: s_waitcnt vmcnt(2) diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll index 79f15123f2b26..29607681634ff 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll @@ -1,5 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=GCN-AA %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefixes=GCN,CI %s ; This test is mostly to test DAG store merging, so disable the vectorizer. ; Run with devices with different unaligned load restrictions. @@ -8,11 +9,16 @@ ; TODO: Non-zero base offset for load and store combinations ; TODO: Same base addrspacecasted - -; GCN-LABEL: {{^}}merge_global_store_2_constants_i8: -; GCN: buffer_store_short -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_2_constants_i8(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_i8: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x7bc8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i32 1 store i8 123, ptr addrspace(1) %out.gep.1 @@ -20,11 +26,18 @@ define amdgpu_kernel void @merge_global_store_2_constants_i8(ptr addrspace(1) %o ret void } -; GCN-LABEL: {{^}}merge_global_store_2_constants_i8_natural_align: -; GCN: buffer_store_byte -; GCN: buffer_store_byte -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_i8_natural_align: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x7b +; GCN-NEXT: v_mov_b32_e32 v1, 0xc8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_byte v0, off, s[0:3], 0 offset:1 +; GCN-NEXT: buffer_store_byte v1, off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i32 1 store i8 123, ptr addrspace(1) %out.gep.1 @@ -32,9 +45,16 @@ define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(ptr a ret void } -; GCN-LABEL: {{^}}merge_global_store_2_constants_i16: -; GCN: buffer_store_dword v define amdgpu_kernel void @merge_global_store_2_constants_i16(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x7b01c8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1 store i16 123, ptr addrspace(1) %out.gep.1 @@ -42,9 +62,16 @@ define amdgpu_kernel void @merge_global_store_2_constants_i16(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16: -; GCN: buffer_store_dword v define amdgpu_kernel void @merge_global_store_2_constants_0_i16(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_0_i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1 store i16 0, ptr addrspace(1) %out.gep.1 @@ -52,11 +79,18 @@ define amdgpu_kernel void @merge_global_store_2_constants_0_i16(ptr addrspace(1) ret void } -; GCN-LABEL: {{^}}merge_global_store_2_constants_i16_natural_align: -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_i16_natural_align: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x7b +; GCN-NEXT: v_mov_b32_e32 v1, 0x1c8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:2 +; GCN-NEXT: buffer_store_short v1, off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1 store i16 123, ptr addrspace(1) %out.gep.1 @@ -64,11 +98,17 @@ define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(ptr ret void } -; GCN-LABEL: {{^}}merge_global_store_2_constants_i32: -; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b -; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]] define amdgpu_kernel void @merge_global_store_2_constants_i32(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x1c8 +; GCN-NEXT: v_mov_b32_e32 v1, 0x7b +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 store i32 123, ptr addrspace(1) %out.gep.1 @@ -76,33 +116,53 @@ define amdgpu_kernel void @merge_global_store_2_constants_i32(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32: -; GCN: buffer_store_dwordx2 define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_i32_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x1c8 +; GCN-NEXT: v_mov_b32_e32 v1, 1.0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 store float 1.0, ptr addrspace(1) %out.gep.1 store i32 456, ptr addrspace(1) %out ret void } -; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32: -; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b -; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]] define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_f32_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 4.0 +; GCN-NEXT: v_mov_b32_e32 v1, 0x7b +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 store i32 123, ptr addrspace(1) %out.gep.1 store float 4.0, ptr addrspace(1) %out ret void } -; GCN-LABEL: {{^}}merge_global_store_4_constants_i32: -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x14d{{$}} -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x1c8{{$}} -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}} -; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}} -; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI]]] define amdgpu_kernel void @merge_global_store_4_constants_i32(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_4_constants_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x4d2 +; GCN-NEXT: v_mov_b32_e32 v1, 0x7b +; GCN-NEXT: v_mov_b32_e32 v2, 0x1c8 +; GCN-NEXT: v_mov_b32_e32 v3, 0x14d +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 @@ -114,9 +174,19 @@ define amdgpu_kernel void @merge_global_store_4_constants_i32(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order: -; GCN: buffer_store_dwordx4 define amdgpu_kernel void @merge_global_store_4_constants_f32_order(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_4_constants_f32_order: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x41000000 +; GCN-NEXT: v_mov_b32_e32 v1, 1.0 +; GCN-NEXT: v_mov_b32_e32 v2, 2.0 +; GCN-NEXT: v_mov_b32_e32 v3, 4.0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3 @@ -129,9 +199,19 @@ define amdgpu_kernel void @merge_global_store_4_constants_f32_order(ptr addrspac } ; First store is out of order. -; GCN-LABEL: {{^}}merge_global_store_4_constants_f32: -; GCN: buffer_store_dwordx4 define amdgpu_kernel void @merge_global_store_4_constants_f32(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_4_constants_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x41000000 +; GCN-NEXT: v_mov_b32_e32 v1, 1.0 +; GCN-NEXT: v_mov_b32_e32 v2, 2.0 +; GCN-NEXT: v_mov_b32_e32 v3, 4.0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3 @@ -143,10 +223,19 @@ define amdgpu_kernel void @merge_global_store_4_constants_f32(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32: -; GCN-AA: buffer_store_dwordx4 v -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_4_constants_mixed_i32_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x41000000 +; GCN-NEXT: v_mov_b32_e32 v1, 11 +; GCN-NEXT: v_mov_b32_e32 v2, 2.0 +; GCN-NEXT: v_mov_b32_e32 v3, 17 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3 @@ -159,13 +248,32 @@ define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(ptr addr ret void } -; GCN-LABEL: {{^}}merge_global_store_3_constants_i32: -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dword v -; CI-DAG: buffer_store_dwordx3 -; GCN-NOT: buffer_store_dword -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_3_constants_i32(ptr addrspace(1) %out) #0 { +; SI-LABEL: merge_global_store_3_constants_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v1, 0x1c8 +; SI-NEXT: v_mov_b32_e32 v0, 0x4d2 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:8 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v1, 0x7b +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_global_store_3_constants_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: v_mov_b32_e32 v2, 0x1c8 +; CI-NEXT: v_mov_b32_e32 v0, 0x4d2 +; CI-NEXT: v_mov_b32_e32 v1, 0x7b +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; CI-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 @@ -175,9 +283,19 @@ define amdgpu_kernel void @merge_global_store_3_constants_i32(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_2_constants_i64: -; GCN: buffer_store_dwordx4 define amdgpu_kernel void @merge_global_store_2_constants_i64(ptr addrspace(1) %out) #0 { +; GCN-LABEL: merge_global_store_2_constants_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x1c8 +; GCN-NEXT: v_mov_b32_e32 v2, 0x7b +; GCN-NEXT: v_mov_b32_e32 v3, v1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i64, ptr addrspace(1) %out, i64 1 store i64 123, ptr addrspace(1) %out.gep.1 @@ -185,10 +303,40 @@ define amdgpu_kernel void @merge_global_store_2_constants_i64(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_4_constants_i64: -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 define amdgpu_kernel void @merge_global_store_4_constants_i64(ptr addrspace(1) %out) #0 { +; SI-LABEL: merge_global_store_4_constants_i64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, 0x1c8 +; SI-NEXT: v_mov_b32_e32 v2, 0x14d +; SI-NEXT: v_mov_b32_e32 v3, v1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 0x4d2 +; SI-NEXT: v_mov_b32_e32 v2, 0x7b +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_global_store_4_constants_i64: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; CI-NEXT: v_mov_b32_e32 v1, 0 +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: v_mov_b32_e32 v0, 0x1c8 +; CI-NEXT: v_mov_b32_e32 v2, 0x14d +; CI-NEXT: v_mov_b32_e32 v3, v1 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 +; CI-NEXT: s_nop 0 +; CI-NEXT: v_mov_b32_e32 v0, 0x4d2 +; CI-NEXT: v_mov_b32_e32 v2, 0x7b +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; CI-NEXT: s_endpgm %out.gep.1 = getelementptr i64, ptr addrspace(1) %out, i64 1 %out.gep.2 = getelementptr i64, ptr addrspace(1) %out, i64 2 %out.gep.3 = getelementptr i64, ptr addrspace(1) %out, i64 3 @@ -200,10 +348,23 @@ define amdgpu_kernel void @merge_global_store_4_constants_i64(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32: -; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]] -; GCN: buffer_store_dwordx2 [[LOAD]] define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_2_adjacent_loads_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 @@ -215,10 +376,21 @@ define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(ptr addrspace ret void } -; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base: -; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_2_adjacent_loads_i32_nonzero_base: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s4, s2 +; GCN-NEXT: s_mov_b32 s5, s3 +; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 offset:8 +; GCN-NEXT: s_mov_b32 s2, s6 +; GCN-NEXT: s_mov_b32 s3, s7 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 offset:8 +; GCN-NEXT: s_endpgm %in.gep.0 = getelementptr i32, ptr addrspace(1) %in, i32 2 %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 3 @@ -232,10 +404,24 @@ define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base( ret void } -; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_shuffle_i32: -; GCN: buffer_load_dwordx2 v -; GCN: buffer_store_dwordx2 v define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_2_adjacent_loads_shuffle_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 @@ -247,10 +433,23 @@ define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(ptr a ret void } -; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32: -; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] -; GCN: buffer_store_dwordx4 [[LOAD]] define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_4_adjacent_loads_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 @@ -270,16 +469,43 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(ptr addrspace ret void } -; GCN-LABEL: {{^}}merge_global_store_3_adjacent_loads_i32: -; SI-DAG: buffer_load_dwordx2 -; SI-DAG: buffer_load_dword -; CI-DAG: buffer_load_dwordx3 -; GCN: s_waitcnt -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dword v -; CI-DAG: buffer_store_dwordx3 -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-LABEL: merge_global_store_3_adjacent_loads_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:8 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_global_store_3_adjacent_loads_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s7, 0xf000 +; CI-NEXT: s_mov_b32 s6, -1 +; CI-NEXT: s_mov_b32 s10, s6 +; CI-NEXT: s_mov_b32 s11, s7 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s2 +; CI-NEXT: s_mov_b32 s9, s3 +; CI-NEXT: buffer_load_dwordx3 v[0:2], off, s[8:11], 0 +; CI-NEXT: s_mov_b32 s4, s0 +; CI-NEXT: s_mov_b32 s5, s1 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 +; CI-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 @@ -295,10 +521,23 @@ define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(ptr addrspace ret void } -; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32: -; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] -; GCN: buffer_store_dwordx4 [[LOAD]] define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_4_adjacent_loads_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3 @@ -318,10 +557,21 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(ptr addrspace ret void } -; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base: -; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 -; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_4_adjacent_loads_i32_nonzero_base: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s4, s2 +; GCN-NEXT: s_mov_b32 s5, s3 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 offset:44 +; GCN-NEXT: s_mov_b32 s2, s6 +; GCN-NEXT: s_mov_b32 s3, s7 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:28 +; GCN-NEXT: s_endpgm %in.gep.0 = getelementptr i32, ptr addrspace(1) %in, i32 11 %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 12 %in.gep.2 = getelementptr i32, ptr addrspace(1) %in, i32 13 @@ -343,11 +593,24 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base( ret void } -; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_inverse_i32: -; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] -; GCN: s_barrier -; GCN: buffer_store_dwordx4 [[LOAD]] define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_4_adjacent_loads_inverse_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_barrier +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 @@ -373,12 +636,49 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(ptr a ; TODO: Re-packing of loaded register required. Maybe an IR pass ; should catch this? - -; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_shuffle_i32: -; GCN: buffer_load_dwordx4 v -; GCN: s_barrier -; GCN: buffer_store_dwordx4 v define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-LABEL: merge_global_store_4_adjacent_loads_shuffle_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: s_barrier +; SI-NEXT: v_mov_b32_e32 v4, v2 +; SI-NEXT: v_mov_b32_e32 v2, v0 +; SI-NEXT: v_mov_b32_e32 v6, v2 +; SI-NEXT: v_mov_b32_e32 v5, v1 +; SI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_global_store_4_adjacent_loads_shuffle_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s7, 0xf000 +; CI-NEXT: s_mov_b32 s6, -1 +; CI-NEXT: s_mov_b32 s10, s6 +; CI-NEXT: s_mov_b32 s11, s7 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s2 +; CI-NEXT: s_mov_b32 s9, s3 +; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; CI-NEXT: s_mov_b32 s4, s0 +; CI-NEXT: s_mov_b32 s5, s1 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: s_barrier +; CI-NEXT: v_mov_b32_e32 v4, v2 +; CI-NEXT: v_mov_b32_e32 v5, v1 +; CI-NEXT: v_mov_b32_e32 v6, v0 +; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 +; CI-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 @@ -402,11 +702,23 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(ptr a ret void } -; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8: -; GCN: buffer_load_dword [[LOAD:v[0-9]+]] -; GCN: buffer_store_dword [[LOAD]] -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_4_adjacent_loads_i8: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i8 1 %out.gep.2 = getelementptr i8, ptr addrspace(1) %out, i8 2 %out.gep.3 = getelementptr i8, ptr addrspace(1) %out, i8 3 @@ -426,17 +738,32 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(ptr addrspace( ret void } -; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8_natural_align: -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte -; GCN: buffer_store_byte -; GCN: buffer_store_byte -; GCN: buffer_store_byte -; GCN: buffer_store_byte -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_4_adjacent_loads_i8_natural_align: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; GCN-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1 +; GCN-NEXT: buffer_load_ubyte v2, off, s[8:11], 0 offset:2 +; GCN-NEXT: buffer_load_ubyte v3, off, s[8:11], 0 offset:3 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:1 +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:2 +; GCN-NEXT: s_waitcnt vmcnt(3) +; GCN-NEXT: buffer_store_byte v3, off, s[4:7], 0 offset:3 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i8 1 %out.gep.2 = getelementptr i8, ptr addrspace(1) %out, i8 2 %out.gep.3 = getelementptr i8, ptr addrspace(1) %out, i8 3 @@ -456,11 +783,23 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align( ret void } -; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32: -; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] -; GCN: buffer_store_dwordx4 [[LOAD]] -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; GCN-LABEL: merge_global_store_4_vector_elts_loads_v4i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 @@ -478,10 +817,16 @@ define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(ptr addr ret void } -; GCN-LABEL: {{^}}merge_local_store_2_constants_i8: -; GCN: ds_write_b16 -; GCN: s_endpgm define amdgpu_kernel void @merge_local_store_2_constants_i8(ptr addrspace(3) %out) #0 { +; GCN-LABEL: merge_local_store_2_constants_i8: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s0, s[4:5], 0x9 +; GCN-NEXT: v_mov_b32_e32 v0, 0x7bc8 +; GCN-NEXT: s_mov_b32 m0, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: ds_write_b16 v1, v0 +; GCN-NEXT: s_endpgm %out.gep.1 = getelementptr i8, ptr addrspace(3) %out, i32 1 store i8 123, ptr addrspace(3) %out.gep.1 @@ -489,11 +834,28 @@ define amdgpu_kernel void @merge_local_store_2_constants_i8(ptr addrspace(3) %ou ret void } -; GCN-LABEL: {{^}}merge_local_store_2_constants_i32: -; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b -; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}} define amdgpu_kernel void @merge_local_store_2_constants_i32(ptr addrspace(3) %out) #0 { +; SI-LABEL: merge_local_store_2_constants_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dword s0, s[4:5], 0x9 +; SI-NEXT: v_mov_b32_e32 v0, 0x7b +; SI-NEXT: v_mov_b32_e32 v1, 0x1c8 +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v2, s0 +; SI-NEXT: ds_write2_b32 v2, v1, v0 offset1:1 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_local_store_2_constants_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dword s0, s[4:5], 0x9 +; CI-NEXT: v_mov_b32_e32 v0, 0x1c8 +; CI-NEXT: v_mov_b32_e32 v1, 0x7b +; CI-NEXT: s_mov_b32 m0, -1 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v2, s0 +; CI-NEXT: ds_write2_b32 v2, v0, v1 offset1:1 +; CI-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(3) %out, i32 1 store i32 123, ptr addrspace(3) %out.gep.1 @@ -501,17 +863,34 @@ define amdgpu_kernel void @merge_local_store_2_constants_i32(ptr addrspace(3) %o ret void } -; GCN-LABEL: {{^}}merge_local_store_4_constants_i32: -; GCN-DAG: v_mov_b32_e32 [[K2:v[0-9]+]], 0x1c8 -; GCN-DAG: v_mov_b32_e32 [[K3:v[0-9]+]], 0x14d -; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K2]], [[K3]] offset0:2 offset1:3 - -; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0x4d2 -; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x7b -; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1 - -; GCN: s_endpgm define amdgpu_kernel void @merge_local_store_4_constants_i32(ptr addrspace(3) %out) #0 { +; SI-LABEL: merge_local_store_4_constants_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dword s0, s[4:5], 0x9 +; SI-NEXT: v_mov_b32_e32 v1, 0x1c8 +; SI-NEXT: v_mov_b32_e32 v2, 0x14d +; SI-NEXT: s_mov_b32 m0, -1 +; SI-NEXT: v_mov_b32_e32 v0, 0x7b +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v3, s0 +; SI-NEXT: ds_write2_b32 v3, v1, v2 offset0:2 offset1:3 +; SI-NEXT: v_mov_b32_e32 v1, 0x4d2 +; SI-NEXT: ds_write2_b32 v3, v1, v0 offset1:1 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_local_store_4_constants_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dword s0, s[4:5], 0x9 +; CI-NEXT: v_mov_b32_e32 v0, 0x1c8 +; CI-NEXT: v_mov_b32_e32 v1, 0x14d +; CI-NEXT: s_mov_b32 m0, -1 +; CI-NEXT: v_mov_b32_e32 v2, 0x4d2 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v3, s0 +; CI-NEXT: ds_write2_b32 v3, v0, v1 offset0:2 offset1:3 +; CI-NEXT: v_mov_b32_e32 v0, 0x7b +; CI-NEXT: ds_write2_b32 v3, v2, v0 offset1:1 +; CI-NEXT: s_endpgm %out.gep.1 = getelementptr i32, ptr addrspace(3) %out, i32 1 %out.gep.2 = getelementptr i32, ptr addrspace(3) %out, i32 2 %out.gep.3 = getelementptr i32, ptr addrspace(3) %out, i32 3 @@ -523,13 +902,38 @@ define amdgpu_kernel void @merge_local_store_4_constants_i32(ptr addrspace(3) %o ret void } -; GCN-LABEL: {{^}}merge_global_store_5_constants_i32: -; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 9{{$}} -; GCN-DAG: v_mov_b32_e32 v[[HI4:[0-9]+]], -12{{$}} -; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI4]]] -; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}} -; GCN: buffer_store_dword v[[HI]] define amdgpu_kernel void @merge_global_store_5_constants_i32(ptr addrspace(1) %out) { +; SI-LABEL: merge_global_store_5_constants_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, 9 +; SI-NEXT: v_mov_b32_e32 v1, 12 +; SI-NEXT: v_mov_b32_e32 v2, 16 +; SI-NEXT: v_mov_b32_e32 v3, -12 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 11 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:16 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_global_store_5_constants_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: v_mov_b32_e32 v0, 9 +; CI-NEXT: v_mov_b32_e32 v1, 12 +; CI-NEXT: v_mov_b32_e32 v2, 16 +; CI-NEXT: v_mov_b32_e32 v3, -12 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; CI-NEXT: s_nop 0 +; CI-NEXT: v_mov_b32_e32 v0, 11 +; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:16 +; CI-NEXT: s_endpgm store i32 9, ptr addrspace(1) %out, align 4 %idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 store i32 12, ptr addrspace(1) %idx1, align 4 @@ -542,10 +946,40 @@ define amdgpu_kernel void @merge_global_store_5_constants_i32(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_6_constants_i32: -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx2 define amdgpu_kernel void @merge_global_store_6_constants_i32(ptr addrspace(1) %out) { +; SI-LABEL: merge_global_store_6_constants_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, 13 +; SI-NEXT: v_mov_b32_e32 v1, 15 +; SI-NEXT: v_mov_b32_e32 v2, 62 +; SI-NEXT: v_mov_b32_e32 v3, 63 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 11 +; SI-NEXT: v_mov_b32_e32 v1, 0x7b +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 offset:16 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_global_store_6_constants_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: v_mov_b32_e32 v0, 13 +; CI-NEXT: v_mov_b32_e32 v1, 15 +; CI-NEXT: v_mov_b32_e32 v2, 62 +; CI-NEXT: v_mov_b32_e32 v3, 63 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; CI-NEXT: s_nop 0 +; CI-NEXT: v_mov_b32_e32 v0, 11 +; CI-NEXT: v_mov_b32_e32 v1, 0x7b +; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 offset:16 +; CI-NEXT: s_endpgm store i32 13, ptr addrspace(1) %out, align 4 %idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 store i32 15, ptr addrspace(1) %idx1, align 4 @@ -560,11 +994,44 @@ define amdgpu_kernel void @merge_global_store_6_constants_i32(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_7_constants_i32: -; GCN: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx2 -; CI: buffer_store_dwordx3 define amdgpu_kernel void @merge_global_store_7_constants_i32(ptr addrspace(1) %out) { +; SI-LABEL: merge_global_store_7_constants_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, 34 +; SI-NEXT: v_mov_b32_e32 v1, 0x3e7 +; SI-NEXT: v_mov_b32_e32 v2, 0x41 +; SI-NEXT: v_mov_b32_e32 v3, 33 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 0xd4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:24 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, 0x62 +; SI-NEXT: v_mov_b32_e32 v1, 0x5b +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 offset:16 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_global_store_7_constants_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: v_mov_b32_e32 v0, 34 +; CI-NEXT: v_mov_b32_e32 v1, 0x3e7 +; CI-NEXT: v_mov_b32_e32 v2, 0x41 +; CI-NEXT: v_mov_b32_e32 v3, 33 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; CI-NEXT: s_nop 0 +; CI-NEXT: v_mov_b32_e32 v0, 0x62 +; CI-NEXT: v_mov_b32_e32 v1, 0x5b +; CI-NEXT: v_mov_b32_e32 v2, 0xd4 +; CI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 offset:16 +; CI-NEXT: s_endpgm store i32 34, ptr addrspace(1) %out, align 4 %idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 store i32 999, ptr addrspace(1) %idx1, align 4 @@ -581,11 +1048,43 @@ define amdgpu_kernel void @merge_global_store_7_constants_i32(ptr addrspace(1) % ret void } -; GCN-LABEL: {{^}}merge_global_store_8_constants_i32: -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 -; GCN: s_endpgm define amdgpu_kernel void @merge_global_store_8_constants_i32(ptr addrspace(1) %out) { +; SI-LABEL: merge_global_store_8_constants_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, 34 +; SI-NEXT: v_mov_b32_e32 v1, 0x3e7 +; SI-NEXT: v_mov_b32_e32 v2, 0x41 +; SI-NEXT: v_mov_b32_e32 v3, 33 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mov_b32_e32 v2, 0x62 +; SI-NEXT: v_mov_b32_e32 v3, 0x5b +; SI-NEXT: v_mov_b32_e32 v4, 0xd4 +; SI-NEXT: v_mov_b32_e32 v5, v1 +; SI-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16 +; SI-NEXT: s_endpgm +; +; CI-LABEL: merge_global_store_8_constants_i32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: v_mov_b32_e32 v0, 34 +; CI-NEXT: v_mov_b32_e32 v1, 0x3e7 +; CI-NEXT: v_mov_b32_e32 v2, 0x41 +; CI-NEXT: v_mov_b32_e32 v3, 33 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; CI-NEXT: v_mov_b32_e32 v4, 0xd4 +; CI-NEXT: v_mov_b32_e32 v2, 0x62 +; CI-NEXT: v_mov_b32_e32 v3, 0x5b +; CI-NEXT: v_mov_b32_e32 v5, v1 +; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16 +; CI-NEXT: s_endpgm store i32 34, ptr addrspace(1) %out, align 4 %idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 store i32 999, ptr addrspace(1) %idx1, align 4 @@ -607,77 +1106,154 @@ define amdgpu_kernel void @merge_global_store_8_constants_i32(ptr addrspace(1) % ; This requires handling of scalar_to_vector for v2i64 to avoid ; scratch usage. ; FIXME: Should do single load and store - -; GCN-LABEL: {{^}}copy_v3i32_align4: -; GCN-NOT: SCRATCH_RSRC_DWORD -; SI-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; SI-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; CI-DAG: buffer_load_dwordx3 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-NOT: offen -; GCN: s_waitcnt vmcnt -; GCN-NOT: offen -; SI-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; SI-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; CI-DAG: buffer_store_dwordx3 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} - -; GCN: ScratchSize: 0{{$}} define amdgpu_kernel void @copy_v3i32_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { +; SI-LABEL: copy_v3i32_align4: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:8 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: copy_v3i32_align4: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s7, 0xf000 +; CI-NEXT: s_mov_b32 s6, -1 +; CI-NEXT: s_mov_b32 s10, s6 +; CI-NEXT: s_mov_b32 s11, s7 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s2 +; CI-NEXT: s_mov_b32 s9, s3 +; CI-NEXT: buffer_load_dwordx3 v[0:2], off, s[8:11], 0 +; CI-NEXT: s_mov_b32 s4, s0 +; CI-NEXT: s_mov_b32 s5, s1 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 +; CI-NEXT: s_endpgm %vec = load <3 x i32>, ptr addrspace(1) %in, align 4 store <3 x i32> %vec, ptr addrspace(1) %out ret void } - -; GCN-LABEL: {{^}}copy_v3i64_align4: -; GCN-NOT: SCRATCH_RSRC_DWORD -; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} -; GCN-NOT: offen -; GCN: s_waitcnt vmcnt -; GCN-NOT: offen -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN: ScratchSize: 0{{$}} + define amdgpu_kernel void @copy_v3i64_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { +; GCN-LABEL: copy_v3i64_align4: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 offset:16 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCN-NEXT: s_endpgm %vec = load <3 x i64>, ptr addrspace(1) %in, align 4 store <3 x i64> %vec, ptr addrspace(1) %out ret void } - -; GCN-LABEL: {{^}}copy_v3f32_align4: -; GCN-NOT: SCRATCH_RSRC_DWORD -; SI-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; SI-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; CI-DAG: buffer_load_dwordx3 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-NOT: offen -; GCN: s_waitcnt vmcnt -; GCN-NOT: offen -; SI-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; SI-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; CI-DAG: buffer_store_dwordx3 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN: ScratchSize: 0{{$}} + define amdgpu_kernel void @copy_v3f32_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { +; SI-LABEL: copy_v3f32_align4: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:8 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_add_f32_e32 v1, 2.0, v1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_add_f32_e32 v2, 4.0, v2 +; SI-NEXT: v_add_f32_e32 v0, 1.0, v0 +; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: copy_v3f32_align4: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; CI-NEXT: s_mov_b32 s7, 0xf000 +; CI-NEXT: s_mov_b32 s6, -1 +; CI-NEXT: s_mov_b32 s10, s6 +; CI-NEXT: s_mov_b32 s11, s7 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s2 +; CI-NEXT: s_mov_b32 s9, s3 +; CI-NEXT: buffer_load_dwordx3 v[0:2], off, s[8:11], 0 +; CI-NEXT: s_mov_b32 s4, s0 +; CI-NEXT: s_mov_b32 s5, s1 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_add_f32_e32 v2, 4.0, v2 +; CI-NEXT: v_add_f32_e32 v1, 2.0, v1 +; CI-NEXT: v_add_f32_e32 v0, 1.0, v0 +; CI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 +; CI-NEXT: s_endpgm %vec = load <3 x float>, ptr addrspace(1) %in, align 4 %fadd = fadd <3 x float> %vec, store <3 x float> %fadd, ptr addrspace(1) %out ret void } - -; GCN-LABEL: {{^}}copy_v3f64_align4: -; GCN-NOT: SCRATCH_RSRC_DWORD -; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} -; GCN-NOT: offen -; GCN: s_waitcnt vmcnt -; GCN-NOT: offen -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN: ScratchSize: 0{{$}} + define amdgpu_kernel void @copy_v3f64_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { +; GCN-LABEL: copy_v3f64_align4: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s10, s6 +; GCN-NEXT: s_mov_b32 s11, s7 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s8, s2 +; GCN-NEXT: s_mov_b32 s9, s3 +; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 offset:16 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_add_f64 v[2:3], v[2:3], 2.0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_f64 v[4:5], v[4:5], 4.0 +; GCN-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 +; GCN-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 +; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCN-NEXT: s_endpgm %vec = load <3 x double>, ptr addrspace(1) %in, align 4 %fadd = fadd <3 x double> %vec, store <3 x double> %fadd, ptr addrspace(1) %out ret void } +; GCN: ScratchSize: 0{{$}} declare void @llvm.amdgcn.s.barrier() #1 diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir index 14ba8fccb172d..1c20db9577695 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel-src.mir @@ -484,7 +484,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 8, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -572,7 +572,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 8, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -638,7 +638,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 24, 8, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -704,7 +704,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 24, 8, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -792,7 +792,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -814,7 +814,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -836,7 +836,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -902,7 +902,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -924,7 +924,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 16, 16, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -946,7 +946,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 6, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 6, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -968,7 +968,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 5, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -990,7 +990,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 4, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 4, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -1012,7 +1012,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 3, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -1034,7 +1034,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 2, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -1056,7 +1056,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 1, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 1, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec @@ -1078,7 +1078,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[COPY]], 0, [[COPY]], 0, 1, 0, 5, 0, implicit $exec ; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_LSHRREV_B32_sdwa]], 0, 32, implicit $exec - ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 1, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 0, implicit $exec + ; CHECK-NEXT: [[V_LSHRREV_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_sdwa 0, [[V_LSHRREV_B32_sdwa]], 16, [[V_LSHRREV_B32_sdwa]], 0, 1, 0, 6, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 %1:vgpr_32 = COPY $vgpr0 %2:vgpr_32 = V_LSHRREV_B32_sdwa 0, %1, 0, %1, 0, 1, 0, 5, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll deleted file mode 100644 index 42436a1b4c279..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll +++ /dev/null @@ -1,180 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s - - -%pair = type { i32, i32 } - -define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) { -; GFX900-LABEL: test_extractvalue_then_else: -; GFX900: ; %bb.0: ; %if -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: flat_load_dword v3, v[0:1] -; GFX900-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 -; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX900-NEXT: s_cbranch_execz .LBB0_2 -; GFX900-NEXT: ; %bb.1: ; %else -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_u32_e32 v3, 1, v3 -; GFX900-NEXT: .LBB0_2: ; %Flow -; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX900-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: flat_store_dword v[0:1], v3 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_setpc_b64 s[30:31] -if: - %load_then = load %pair, ptr %ptr - br i1 %cond, label %then, label %else - -then: - %a_then = extractvalue %pair %load_then, 0 - br label %merge - -else: - %a_else = extractvalue %pair %load_then, 0 - %sum_else = add i32 %a_else, 1 - br label %merge - -merge: - %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ] - store i32 %phi, ptr %ptr - ret void -} - -define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) { -; GFX900-LABEL: test_extractvalue_else_then: -; GFX900: ; %bb.0: ; %if -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: flat_load_dword v3, v[0:1] -; GFX900-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 -; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX900-NEXT: s_cbranch_execz .LBB1_2 -; GFX900-NEXT: ; %bb.1: ; %else -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_u32_e32 v3, 1, v3 -; GFX900-NEXT: .LBB1_2: ; %merge -; GFX900-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: flat_store_dword v[0:1], v3 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_setpc_b64 s[30:31] -if: - %load_then = load %pair, ptr %ptr - br i1 %cond, label %else, label %then - -else: - %a_else = extractvalue %pair %load_then, 0 - %sum_else = add i32 %a_else, 1 - br label %merge - -then: - %a_then = extractvalue %pair %load_then, 0 - br label %merge - -merge: - %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ] - store i32 %phi, ptr %ptr - ret void -} - -define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 { -; GFX900-LABEL: test_loop_with_if: -; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX900-NEXT: v_mov_b32_e32 v5, 0 -; GFX900-NEXT: s_mov_b64 s[4:5], 0 -; GFX900-NEXT: s_movk_i32 s10, 0xfe -; GFX900-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-NEXT: s_bitcmp1_b32 s2, 0 -; GFX900-NEXT: s_cselect_b64 s[2:3], -1, 0 -; GFX900-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[2:3] -; GFX900-NEXT: v_mov_b32_e32 v2, s1 -; GFX900-NEXT: s_xor_b64 s[2:3], s[2:3], -1 -; GFX900-NEXT: v_mov_b32_e32 v1, s0 -; GFX900-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v3 -; GFX900-NEXT: s_branch .LBB2_2 -; GFX900-NEXT: .LBB2_1: ; %latch -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_u32_e32 v5, 20, v3 -; GFX900-NEXT: v_cmp_lt_i32_e32 vcc, s10, v5 -; GFX900-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX900-NEXT: flat_store_dword v[1:2], v3 -; GFX900-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX900-NEXT: s_cbranch_execz .LBB2_8 -; GFX900-NEXT: .LBB2_2: ; %loop -; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX900-NEXT: flat_load_dwordx2 v[3:4], v[1:2] -; GFX900-NEXT: s_and_b64 vcc, exec, s[0:1] -; GFX900-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX900-NEXT: s_mov_b64 s[6:7], 0 -; GFX900-NEXT: s_cbranch_vccnz .LBB2_4 -; GFX900-NEXT: ; %bb.3: ; %if -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: v_cmp_gt_i32_e32 vcc, 11, v5 -; GFX900-NEXT: s_andn2_b64 s[8:9], s[2:3], exec -; GFX900-NEXT: s_and_b64 s[12:13], vcc, exec -; GFX900-NEXT: s_mov_b64 s[6:7], -1 -; GFX900-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] -; GFX900-NEXT: .LBB2_4: ; %Flow -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: s_and_saveexec_b64 s[12:13], s[8:9] -; GFX900-NEXT: s_xor_b64 s[8:9], exec, s[12:13] -; GFX900-NEXT: s_cbranch_execz .LBB2_6 -; GFX900-NEXT: ; %bb.5: ; %else -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_add_u32_e32 v3, v3, v4 -; GFX900-NEXT: s_andn2_b64 s[6:7], s[6:7], exec -; GFX900-NEXT: .LBB2_6: ; %Flow1 -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX900-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] -; GFX900-NEXT: s_cbranch_execz .LBB2_1 -; GFX900-NEXT: ; %bb.7: ; %then -; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GFX900-NEXT: flat_store_dword v[1:2], v0 -; GFX900-NEXT: s_branch .LBB2_1 -; GFX900-NEXT: .LBB2_8: ; %end -; GFX900-NEXT: s_endpgm -entry: - %a = tail call i32 @llvm.amdgcn.workitem.id.x() - br label %loop - -loop: - %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ] - %load = load %pair, ptr %ptr - br i1 %cond, label %if, label %else - -if: - %cmp = icmp sgt i32 %entry_phi, 10 - br i1 %cmp, label %then, label %else - -then: - %a_then = extractvalue %pair %load, 0 - store i32 %a, ptr %ptr, align 4 - br label %latch - -else: - %a2 = extractvalue %pair %load, 1 - %y = extractvalue %pair %load, 0 - %a_else = add i32 %y, %a2 - br label %latch - -latch: - %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ] - store i32 %a_test, ptr %ptr - %a15 = add nsw i32 %a_test, 20 - %a16 = icmp slt i32 %a15, 255 - br i1 %a16, label %loop, label %end - -end: - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir new file mode 100644 index 0000000000000..586ddf627bd9e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir @@ -0,0 +1,3243 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefix=PAIR %s + +--- +name: vopd_combine_low_vgprs +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_low_vgprs + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_low_vgprs + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec +... + +--- +name: vopd_mov_max_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_mov_max_i32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_MAX_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_mov_max_i32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_MAX_I32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr3 = V_MAX_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_mov_min_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_mov_min_i32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_MIN_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_mov_min_i32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_MIN_I32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr3 = V_MIN_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_max_i32_max_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_max_i32_max_i32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_MAX_I32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_MAX_I32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_max_i32_max_i32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = V_MAX_I32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr3 = V_MAX_I32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAX_I32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + $vgpr3 = V_MAX_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_min_i32_min_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_min_i32_min_i32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_MIN_I32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_MIN_I32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_min_i32_min_i32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = V_MIN_I32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr3 = V_MIN_I32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MIN_I32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + $vgpr3 = V_MIN_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_mov_sub_nc_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_mov_sub_nc_i32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_SUB_U32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_mov_sub_nc_i32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_SUB_U32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr3 = V_SUB_U32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_mov_lshrrev_b32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_mov_lshrrev_b32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_LSHRREV_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_mov_lshrrev_b32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_LSHRREV_B32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr3 = V_LSHRREV_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_mov_ashrrev_i32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_mov_ashrrev_i32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_ASHRREV_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_mov_ashrrev_i32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ASHRREV_I32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr3 = V_ASHRREV_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_same_vgprs_banks +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_same_vgprs_banks + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 killed $vgpr0, killed $vgpr5, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_same_vgprs_banks + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr6 = V_MUL_F32_e32 killed $vgpr0, killed $vgpr5, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr5, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_same_vgprs +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_same_vgprs + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_same_vgprs + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr0, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_same_dst_parity +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_same_dst_parity + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_same_dst_parity + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_e96_gfx1250 0, $vgpr1, 0, $vgpr1, 0, $vgpr0, 0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_x_fmaak +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_x_fmaak + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = V_FMAAK_F32 killed $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr0, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_x_fmaak + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1, $vgpr2 = V_DUAL_FMAAK_F32_X_MOV_B32_e32_gfx1250 killed $sgpr0, $vgpr0, 981467136, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr0, implicit $exec + $vgpr0 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vgpr1 = V_FMAAK_F32 $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec +... + +--- +name: vopd_combine_y_fmaak +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_y_fmaak + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr2 = V_FMAAK_F32 killed $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr0, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_y_fmaak + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_FMAAK_F32_gfx1250 $vgpr0, killed $sgpr0, $vgpr0, 981467136, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr0, implicit $exec + $vgpr0 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr2 = V_FMAAK_F32 $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_x_fmaak_same_dst_parity +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_x_fmaak_same_dst_parity + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = V_FMAAK_F32 killed $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_x_fmaak_same_dst_parity + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = V_FMAAK_F32 killed $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec + ; PAIR-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vgpr1 = V_FMAAK_F32 $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec +... + +--- +name: vopd_no_combine_y_fmaak_same_dst_parity +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_y_fmaak_same_dst_parity + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_FMAAK_F32 killed $sgpr0, killed $vgpr0, 981467136, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_y_fmaak_same_dst_parity + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec + ; PAIR-NEXT: $vgpr3 = V_FMAAK_F32 killed $sgpr0, killed $vgpr0, 981467136, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr3 = V_FMAAK_F32 $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_literal_x +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_literal_x + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_literal_x + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 12345, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_literal_y +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_literal_y + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_literal_y + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr6 = V_DUAL_MUL_F32_e32_X_SUB_F32_e32_gfx1250 $vgpr0, $vgpr0, 12345, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec +... + +# Below 2 tests cannot use VOPD because of the vdst parity and cannot use +# VOPD3 because of the literal use. +--- +name: vopd_no_combine_literal_x +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_literal_x + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_literal_x + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_literal_y +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_literal_y + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_SUB_F32_e32 12345, killed $vgpr1, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_literal_y + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_SUB_F32_e32 12345, killed $vgpr1, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_add_u32_add_f32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_add_u32_add_f32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_add_u32_add_f32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4, $vgpr6 = V_DUAL_ADD_U32_e32_X_ADD_F32_e32_e96_gfx1250 $vgpr0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_add_f32_add_u32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_add_f32_add_u32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_add_f32_add_u32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr4 = V_DUAL_ADD_F32_e32_X_ADD_U32_e32_e96_gfx1250 0, killed $vgpr2, 0, killed $vgpr3, $vgpr0, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec + $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec +... + +--- +name: vopd_combine_add_u32_add_f32_same_dst_parity +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_add_u32_add_f32_same_dst_parity + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = V_ADD_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_add_u32_add_f32_same_dst_parity + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5, $vgpr4 = V_DUAL_ADD_F32_e32_X_ADD_U32_e32_gfx1250 killed $vgpr2, killed $vgpr3, killed $vgpr0, killed $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_add_f32_add_u32_same_dst_parity +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_add_f32_add_u32_same_dst_parity + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_ADD_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_add_f32_add_u32_same_dst_parity + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5, $vgpr4 = V_DUAL_ADD_F32_e32_X_ADD_U32_e32_gfx1250 killed $vgpr2, killed $vgpr3, killed $vgpr0, killed $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr5 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec + $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec +... + +--- +name: vopd_combine_lshl_lshl +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_lshl_lshl + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = V_LSHLREV_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_LSHLREV_B32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_lshl_lshl + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4, $vgpr6 = V_DUAL_LSHLREV_B32_e32_X_LSHLREV_B32_e32_e96_gfx1250 killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, implicit $exec, implicit $exec, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = V_LSHLREV_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_LSHLREV_B32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_ashr_ashr +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_ashr_ashr + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = V_ASHRREV_I32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_ASHRREV_I32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_ashr_ashr + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4, $vgpr5 = V_DUAL_ASHRREV_I32_e32_X_ASHRREV_I32_e32_e96_gfx1250 killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, implicit $exec, implicit $exec, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = V_ASHRREV_I32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_ASHRREV_I32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_sub_u32_sub_u32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_sub_u32_sub_u32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = V_SUB_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_SUB_U32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_sub_u32_sub_u32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4, $vgpr5 = V_DUAL_SUB_U32_e32_X_SUB_U32_e32_e96_gfx1250 killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, implicit $exec, implicit $exec, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = V_SUB_U32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_SUB_U32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_sub_u32_sub_u32_lit +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_sub_u32_sub_u32_lit + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = V_SUB_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_SUB_U32_e32 300, killed $vgpr2, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_sub_u32_sub_u32_lit + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = V_SUB_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_SUB_U32_e32 300, killed $vgpr2, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr4 = V_SUB_U32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_SUB_U32_e32 300, $vgpr2, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fmac_fmac +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fmac_fmac + ; SCHED: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr3, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fmac_fmac + ; PAIR: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32_gfx1250 $vgpr1, $vgpr1, killed $vgpr2, killed $vgpr1, $vgpr1, killed $vgpr3, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fmac_fmac_same_dst_parity +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fmac_fmac_same_dst_parity + ; SCHED: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr4, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fmac_fmac_same_dst_parity + ; PAIR: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2, $vgpr4 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32_e96_gfx1250 0, $vgpr1, 0, $vgpr1, killed $vgpr2, 0, killed $vgpr1, 0, $vgpr1, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec + $vgpr4 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fmac_fmac_same_dst +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fmac_fmac_same_dst + ; SCHED: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fmac_fmac_same_dst + ; PAIR: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec + $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_add_f32_fadd_f32_same_dst +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_add_f32_fadd_f32_same_dst + ; SCHED: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_add_f32_fadd_f32_same_dst + ; PAIR: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_add_f64_add_f32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_add_f64_add_f32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_add_f64_add_f32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5, $vgpr6 = V_DUAL_ADD_F64_pseudo_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr8_vgpr9, 0, killed $vgpr2, 0, killed $vgpr3, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr8_vgpr9 = IMPLICIT_DEF + $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr8_vgpr9, implicit $mode, implicit $exec + $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_add_f32_add_f64 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_add_f32_add_f64 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_add_f32_add_f64 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5, $vgpr6 = V_DUAL_ADD_F64_pseudo_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr8_vgpr9, 0, killed $vgpr2, 0, killed $vgpr3, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr8_vgpr9 = IMPLICIT_DEF + $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec + $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr8_vgpr9, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_add_f64_add_f64 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_add_f64_add_f64 + ; SCHED: $vgpr8_vgpr9 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr6_vgpr7 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_add_f64_add_f64 + ; PAIR: $vgpr8_vgpr9 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr6_vgpr7 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr8_vgpr9 = IMPLICIT_DEF + $vgpr10_vgpr11 = IMPLICIT_DEF + $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr8_vgpr9, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr6_vgpr7 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr10_vgpr11, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_add_f64_add_f32_overlapping_dst +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_dst + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_dst + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr8_vgpr9 = IMPLICIT_DEF + $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr8_vgpr9, implicit $mode, implicit $exec + $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub1 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub1 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr5, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub1 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr5, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr10_vgpr11 = IMPLICIT_DEF + $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr10_vgpr11, implicit $mode, implicit $exec + $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr5, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub0 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub0 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr4, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub0 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr4, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr10_vgpr11 = IMPLICIT_DEF + $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr10_vgpr11, implicit $mode, implicit $exec + $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_fma +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_fma + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_fma + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fma_fma_bank_conflict_src2 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fma_fma_bank_conflict_src2 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr10, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fma_fma_bank_conflict_src2 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr10, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr10 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr10, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_add_f32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_add_f32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_add_f32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_add_f32_fma +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_add_f32_fma + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_add_f32_fma + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr7, $vgpr6 = V_DUAL_ADD_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, killed $vgpr3, 0, killed $vgpr4, 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_add_f64 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_add_f64 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 killed $vgpr2_vgpr3, killed $vgpr10_vgpr11, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_add_f64 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr6 = V_DUAL_ADD_F64_pseudo_e32_X_FMA_F32_e64_e96_gfx1250 0, killed $vgpr2_vgpr3, 0, killed $vgpr10_vgpr11, 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr10_vgpr11 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr2_vgpr3, $vgpr10_vgpr11, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fma_src0_mod_fma +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fma_src0_mod_fma + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 3, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fma_src0_mod_fma + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 3, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 3, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fma_fma_src1_mod +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fma_fma_src1_mod + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 2, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fma_fma_src1_mod + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 2, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 2, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fma_fma_src2_mod +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fma_fma_src2_mod + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 3, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fma_fma_src2_mod + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 3, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 3, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fma_clamp_fma +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fma_clamp_fma + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 1, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fma_clamp_fma + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 1, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fma_fma_omod +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fma_fma_omod + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fma_fma_omod + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 1, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_fma_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_fma_neg + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 1, $vgpr0, 1, $vgpr1, 1, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 1, killed $vgpr3, 1, killed $vgpr4, 1, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_fma_neg + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 1, $vgpr0, 1, $vgpr1, 1, killed $vgpr2, 1, killed $vgpr3, 1, killed $vgpr4, 1, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 1, $vgpr0, 1, $vgpr1, 1, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 1, $vgpr3, 1, $vgpr4, 1, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_fma_src0_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_fma_src0_neg + ; SCHED: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 1, $sgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $sgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_fma_src0_neg + ; PAIR: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 1, $sgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $sgpr0, killed $vgpr1, implicit $exec + $sgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 1, $sgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $sgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_fma_src1_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_fma_src1_neg + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 1, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_fma_src1_neg + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, 1, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 1, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_fma_src2_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_fma_src2_neg + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 1, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_fma_src2_neg + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 1, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 1, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_f64_fma_f32_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_f64_fma_f32_neg + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr7 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10_vgpr11 = V_FMA_F64_e64 1, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 1, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr9 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr8, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_f64_fma_f32_neg + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr7 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10_vgpr11, $vgpr9 = V_DUAL_FMA_F64_e64_X_FMA_F32_e64_e96_gfx1250 1, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 1, killed $vgpr4_vgpr5, 0, killed $vgpr6, 0, killed $vgpr8, 0, killed $vgpr7, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4_vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = IMPLICIT_DEF + $vgpr8 = IMPLICIT_DEF + $vgpr10_vgpr11 = V_FMA_F64_e64 1, $vgpr0_vgpr1, 1, $vgpr2_vgpr3, 1, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr12 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr9 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr8, 0, $vgpr7, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_lshl_add_u64_fma +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_lshl_add_u64_fma + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec + ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr2, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_lshl_add_u64_fma + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec + ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr2, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec + $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr2, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_lshl_add_u64 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_lshl_add_u64 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr2, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 killed $vgpr0_vgpr1, $vgpr1, killed $vgpr2_vgpr3, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_lshl_add_u64 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr2, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 killed $vgpr0_vgpr1, $vgpr1, killed $vgpr2_vgpr3, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr2, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec +... + +--- +name: vopd_no_combine_lshl_add_u64_fma_overlapping_src2 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_lshl_add_u64_fma_overlapping_src2 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec + ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_lshl_add_u64_fma_overlapping_src2 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec + ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec + $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_lshl_add_u64_fma_src0_conflict +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_lshl_add_u64_fma_src0_conflict + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, killed $vgpr5, $vgpr2_vgpr3, implicit $exec + ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr1, 0, killed $vgpr3, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_lshl_add_u64_fma_src0_conflict + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, killed $vgpr5, $vgpr2_vgpr3, implicit $exec + ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr1, 0, killed $vgpr3, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr5, $vgpr2_vgpr3, implicit $exec + $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr8 = V_FMA_F32_e64 0, $vgpr1, 0, $vgpr3, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_lshl_add_u64_fma_src1_conflict +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_lshl_add_u64_fma_src1_conflict + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr5, $vgpr2_vgpr3, implicit $exec + ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr5, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_lshl_add_u64_fma_src1_conflict + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr5, $vgpr2_vgpr3, implicit $exec + ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr5, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr5, $vgpr2_vgpr3, implicit $exec + $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr5, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_f64_fma_f32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_f64_fma_f32 + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr7 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr9 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr8, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_f64_fma_f32 + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr7 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10_vgpr11, $vgpr9 = V_DUAL_FMA_F64_e64_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr4_vgpr5, 0, killed $vgpr6, 0, killed $vgpr8, 0, killed $vgpr7, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4_vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = IMPLICIT_DEF + $vgpr8 = IMPLICIT_DEF + $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr12 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr9 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr8, 0, $vgpr7, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fma_f64_fma_f32_overlapping_src1 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fma_f64_fma_f32_overlapping_src1 + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr7 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr9 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr3, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fma_f64_fma_f32_overlapping_src1 + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr7 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr9 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr3, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4_vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = IMPLICIT_DEF + $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr12 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr9 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr3, 0, $vgpr7, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_f32_add_f64_e32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_f32_add_f64_e32 + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_f32_add_f64_e32 + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_ADD_F64_pseudo_e32_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr10 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_f32_add_f64_e64 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_f32_add_f64_e64 + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e64 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_f32_add_f64_e64 + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_ADD_F64_pseudo_e32_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr10 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr8_vgpr9 = V_ADD_F64_pseudo_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_f32_add_f64_e64_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_f32_add_f64_e64_neg + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e64 1, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_f32_add_f64_e64_neg + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_ADD_F64_pseudo_e32_X_FMA_F32_e64_e96_gfx1250 1, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr7 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr10 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr8_vgpr9 = V_ADD_F64_pseudo_e64 1, $vgpr0_vgpr1, 1, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fma_bitop +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_bitop + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_BITOP3_B32_e64 killed $vgpr3, killed $vgpr4, 0, 123, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_bitop + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5, $vgpr6 = V_DUAL_FMA_F32_e64_X_BITOP2_B32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, killed $vgpr3, killed $vgpr4, 123, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr7 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_BITOP3_B32_e64 $vgpr3, $vgpr4, 0, 123, implicit $exec +... + +# Make sure bitop3 modifier does not count against constant bus limit. +--- +name: vopd_combine_fma_bitop_2_scalar_src +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fma_bitop_2_scalar_src + ; SCHED: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = V_FMA_F32_e64 0, $sgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_BITOP3_B32_e64 killed $sgpr3, killed $vgpr4, 0, 123, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e64 killed $sgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fma_bitop_2_scalar_src + ; PAIR: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5, $vgpr6 = V_DUAL_FMA_F32_e64_X_BITOP2_B32_e64_e96_gfx1250 0, $sgpr0, 0, $vgpr1, 0, killed $vgpr2, killed $sgpr3, killed $vgpr4, 123, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e64 killed $sgpr0, killed $vgpr1, implicit $exec + $sgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $sgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = V_FMA_F32_e64 0, $sgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr7 = V_BFM_B32_e64 $sgpr0, $vgpr1, implicit $exec + $vgpr6 = V_BITOP3_B32_e64 $sgpr3, $vgpr4, 0, 123, implicit $exec +... + +--- +name: vopd_combine_bitop_mov_b32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_bitop_mov_b32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_BITOP3_B32_e64 $vgpr0, $vgpr1, 0, 20, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_bitop_mov_b32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5, $vgpr3 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 killed $vgpr2, $vgpr0, $vgpr1, 20, implicit $exec, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = V_BITOP3_B32_e64 $vgpr0, $vgpr1, 0, 20, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec +... + +--- +name: vopd_no_combine_mov_b32_bitop_non_imm_src2 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_mov_b32_bitop_non_imm_src2 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_BITOP3_B32_e64 killed $vgpr0, killed $vgpr1, killed $vgpr2, 20, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_mov_b32_bitop_non_imm_src2 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr3 = V_BITOP3_B32_e64 killed $vgpr0, killed $vgpr1, killed $vgpr2, 20, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr3 = V_BITOP3_B32_e64 $vgpr0, $vgpr1, $vgpr2, 20, implicit $exec +... + +--- +name: vopd_no_combine_mov_b32_bitop_non_zero_src2 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_mov_b32_bitop_non_zero_src2 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr3 = V_BITOP3_B32_e64 killed $vgpr0, killed $vgpr1, 1, 20, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_mov_b32_bitop_non_zero_src2 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr3 = V_BITOP3_B32_e64 killed $vgpr0, killed $vgpr1, 1, 20, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr3 = V_BITOP3_B32_e64 $vgpr0, $vgpr1, 1, 20, implicit $exec +... + +--- +name: vopd_no_combine_bitop3_mov_dpp_vgpr_src2 +tracksRegLiveness: true +body: | + bb.0: + ; SCHED-LABEL: name: vopd_no_combine_bitop3_mov_dpp_vgpr_src2 + ; SCHED: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: renamable $vgpr1 = V_MOV_B32_dpp killed $vgpr1, $vgpr3, 258, 15, 15, 0, implicit $exec + ; SCHED-NEXT: renamable $vgpr1 = V_BITOP3_B32_e64 killed $vgpr3, killed $vgpr4, killed $vgpr1, 128, implicit $exec + ; SCHED-NEXT: renamable $vgpr3 = V_MOV_B32_e32 -1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_bitop3_mov_dpp_vgpr_src2 + ; PAIR: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: renamable $vgpr1 = V_MOV_B32_dpp killed $vgpr1, $vgpr3, 258, 15, 15, 0, implicit $exec + ; PAIR-NEXT: renamable $vgpr1 = V_BITOP3_B32_e64 killed $vgpr3, killed $vgpr4, killed $vgpr1, 128, implicit $exec + ; PAIR-NEXT: renamable $vgpr3 = V_MOV_B32_e32 -1, implicit $exec + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + + renamable $vgpr1 = V_MOV_B32_dpp killed $vgpr1, $vgpr3, 258, 15, 15, 0, implicit $exec + renamable $vgpr1 = V_BITOP3_B32_e64 killed $vgpr3, $vgpr4, killed $vgpr1, 128, implicit $exec + renamable $vgpr3 = V_MOV_B32_e32 -1, implicit $exec +... + +--- +name: vopd_combine_mov_or +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_mov_or + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_OR_B32_e32 $vgpr1, killed $vgpr2, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_mov_or + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 $vgpr0, $vgpr1, killed $vgpr2, 84, implicit $exec, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_OR_B32_e32 $vgpr1, $vgpr2, implicit $exec +... + +--- +name: vopd_combine_mov_and +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_mov_and + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_AND_B32_e32 $vgpr1, killed $vgpr2, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_mov_and + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 $vgpr0, $vgpr1, killed $vgpr2, 64, implicit $exec, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_AND_B32_e32 $vgpr1, $vgpr2, implicit $exec +... + +--- +name: vopd_combine_mov_xor +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_mov_xor + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_XOR_B32_e32 $vgpr1, killed $vgpr2, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_mov_xor + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 $vgpr0, $vgpr1, killed $vgpr2, 20, implicit $exec, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_XOR_B32_e32 $vgpr1, $vgpr2, implicit $exec +... + +--- +name: vopd_combine_mov_xnor +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_mov_xnor + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_XNOR_B32_e32 $vgpr1, killed $vgpr2, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_mov_xnor + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 $vgpr0, $vgpr1, killed $vgpr2, 65, implicit $exec, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_XNOR_B32_e32 $vgpr1, $vgpr2, implicit $exec +... + +# V_NOT_B32 can also be combined to BITOP2, but we need to come up with a fake src1 +# which would satisfy all register constraints and does not break liveness. +# This is not trivial at the very least. +--- +name: vopd_combine_mov_not +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_mov_not + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_NOT_B32_e32 killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_mov_not + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_NOT_B32_e32 killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_NOT_B32_e32 $vgpr1, implicit $exec +... + +--- +name: vopd_combine_fadd_not +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fadd_not + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_NOT_B32_e32 killed $vgpr2, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fadd_not + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_NOT_B32_e32 killed $vgpr2, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr5 = V_NOT_B32_e32 $vgpr2, implicit $exec +... + +--- +name: vopd_combine_fadd_f64_not +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fadd_f64_not + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr11 = V_NOT_B32_e32 killed $vgpr6, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fadd_f64_not + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr11 = V_NOT_B32_e32 killed $vgpr6, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec + $vgpr10 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr11 = V_NOT_B32_e32 $vgpr6, implicit $exec +... + +--- +name: vopd_no_combine_src1_imm +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_src1_imm + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, 1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_src1_imm + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, 1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, 1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_src2_imm +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_src2_imm + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, 1, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_src2_imm + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, 1, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, 1, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_src1_sgpr +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_src1_sgpr + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, killed $sgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_src1_sgpr + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, killed $sgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $sgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_src2_sgpr +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_src2_sgpr + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $sgpr1, 0, 0, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_src2_sgpr + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $sgpr1, 0, 0, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $sgpr1, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_cndmask_fadd +tracksRegLiveness: true +body: | + bb.0: + liveins: $vcc_lo + + ; SCHED-LABEL: name: vopd_combine_cndmask_fadd + ; SCHED: liveins: $vcc_lo + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $exec, implicit killed $vcc_lo + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_fadd + ; PAIR: liveins: $vcc_lo + ; PAIR-NEXT: {{ $}} + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx1250 $vgpr0, $vgpr1, killed $vgpr3, killed $vgpr4, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $exec, implicit killed $vcc_lo, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $exec, implicit $vcc + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_cndmask_fma +tracksRegLiveness: true +body: | + bb.0: + liveins: $vcc_lo + + ; SCHED-LABEL: name: vopd_combine_cndmask_fma + ; SCHED: liveins: $vcc_lo + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $exec, implicit killed $vcc_lo + ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_fma + ; PAIR: liveins: $vcc_lo + ; PAIR-NEXT: {{ $}} + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, $vcc_lo, 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, implicit $exec, implicit $mode, implicit $exec, implicit killed $vcc_lo, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $exec, implicit $vcc + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_cndmask_e64_vcc_fadd +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_cndmask_e64_vcc_fadd + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vcc = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, killed $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_e64_vcc_fadd + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vcc = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, killed $vcc_lo, 0, killed $vgpr3, 0, killed $vgpr4, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vcc = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, $vcc_lo, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_cndmask_e64_sgpr_fadd +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_cndmask_e64_sgpr_fadd + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, killed $sgpr0, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_e64_sgpr_fadd + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, killed $sgpr0, 0, killed $vgpr3, 0, killed $vgpr4, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, $sgpr0, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_cndmask_e64_neg_vcc_fadd +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_cndmask_e64_neg_vcc_fadd + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vcc = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 1, $vgpr1, killed $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_e64_neg_vcc_fadd + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vcc = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 1, $vgpr1, killed $vcc_lo, 0, killed $vgpr3, 0, killed $vgpr4, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vcc = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 1, $vgpr1, $vcc_lo, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_cndmask_e64_vcc_fadd_constant_bus_limit +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_cndmask_e64_vcc_fadd_constant_bus_limit + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vcc = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, killed $sgpr0, 0, $vgpr1, killed $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $sgpr3, killed $vgpr4, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_cndmask_e64_vcc_fadd_constant_bus_limit + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vcc = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, killed $sgpr0, 0, $vgpr1, killed $vcc_lo, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_ADD_F32_e32 killed $sgpr3, killed $vgpr4, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $sgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vcc = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e64 0, $sgpr0, 0, $vgpr1, $vcc_lo, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $sgpr3, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_cndmask_e64_vcc_fadd_sgpr_src1 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_cndmask_e64_vcc_fadd_sgpr_src1 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vcc = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, killed $sgpr0, killed $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $sgpr3, killed $vgpr4, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_cndmask_e64_vcc_fadd_sgpr_src1 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vcc = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, killed $sgpr0, killed $vcc_lo, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_ADD_F32_e32 killed $sgpr3, killed $vgpr4, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $sgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vcc = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $sgpr0, $vcc_lo, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $sgpr3, $vgpr4, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_cndmask_e64_cndmask_e32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vcc_lo + + ; SCHED-LABEL: name: vopd_combine_cndmask_e64_cndmask_e32 + ; SCHED: liveins: $vcc_lo + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, killed $sgpr0, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr3, killed $vgpr4, implicit killed $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_e64_cndmask_e32 + ; PAIR: liveins: $vcc_lo + ; PAIR-NEXT: {{ $}} + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, killed $sgpr0, 0, killed $vgpr3, 0, killed $vgpr4, $vcc_lo, implicit $exec, implicit $exec, implicit killed $vcc_lo, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, $sgpr0, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_CNDMASK_B32_e32 $vgpr3, $vgpr4, implicit $vcc, implicit $exec +... + +--- +name: vopd_combine_cndmask_e32_cndmask_e64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vcc_lo + + ; SCHED-LABEL: name: vopd_combine_cndmask_e32_cndmask_e64 + ; SCHED: liveins: $vcc_lo + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit killed $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e64 0, killed $vgpr3, 0, killed $vgpr4, killed $sgpr0, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_e32_cndmask_e64 + ; PAIR: liveins: $vcc_lo + ; PAIR-NEXT: {{ $}} + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, $vcc_lo, 0, killed $vgpr3, 0, killed $vgpr4, killed $sgpr0, implicit $exec, implicit killed $vcc_lo, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $vcc, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0, implicit $exec +... + +--- +name: vopd_combine_cndmask_e32_cndmask_e32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vcc_lo + + ; SCHED-LABEL: name: vopd_combine_cndmask_e32_cndmask_e32 + ; SCHED: liveins: $vcc_lo + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr3, killed $vgpr4, implicit killed $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_e32_cndmask_e32 + ; PAIR: liveins: $vcc_lo + ; PAIR-NEXT: {{ $}} + ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_gfx1250 $vgpr0, $vgpr1, killed $vgpr3, killed $vgpr4, implicit $vcc_lo, implicit $exec, implicit $vcc_lo, implicit $exec, implicit killed $vcc_lo, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $vcc, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_CNDMASK_B32_e32 $vgpr3, $vgpr4, implicit $vcc, implicit $exec +... + +--- +name: vopd_combine_cndmask_e64_cndmask_e64 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_cndmask_e64_cndmask_e64 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $sgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, killed $sgpr0, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e64 0, killed $vgpr3, 0, killed $vgpr4, killed $sgpr1, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_e64_cndmask_e64 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $sgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, killed $sgpr0, 0, killed $vgpr3, 0, killed $vgpr4, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, $sgpr0, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr1, implicit $exec +... + +--- +name: vopd_combine_fadd_e64_fadd_e64 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fadd_e64_fadd_e64 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e64 0, killed $vgpr3, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fadd_e64_fadd_e64 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_ADD_F32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr3, 0, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e64 0, $vgpr3, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_fadd_e64_neg_fadd_e32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_fadd_e64_neg_fadd_e32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 1, $vgpr1, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr2, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_fadd_e64_neg_fadd_e32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_ADD_F32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 1, $vgpr1, 0, killed $vgpr3, 0, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 1, $vgpr1, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr2, implicit $mode, implicit $exec +... + +--- +name: vopd_no_combine_fadd_e64_abs_neg_fadd_e32 +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_no_combine_fadd_e64_abs_neg_fadd_e32 + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 3, $vgpr1, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr2, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_fadd_e64_abs_neg_fadd_e32 + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 3, $vgpr1, 0, 0, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; PAIR-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr2, implicit $mode, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 3, $vgpr1, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr2, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_mul_f64_e64_sub_f32_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_mul_f64_e64_sub_f32_neg + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = V_MUL_F64_pseudo_e64 1, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_SUB_F32_e64 0, killed $vgpr6, 1, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_mul_f64_e64_sub_f32_neg + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MUL_F64_pseudo_e32_X_SUB_F32_e32_e96_gfx1250 1, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 1, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr8_vgpr9 = V_MUL_F64_pseudo_e64 1, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_SUB_F32_e64 0, $vgpr6, 1, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_mul_f64_e32_subrev_f32_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_mul_f64_e32_subrev_f32_neg + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = V_MUL_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_SUBREV_F32_e64 1, killed $vgpr6, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_mul_f64_e32_subrev_f32_neg + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MUL_F64_pseudo_e32_X_SUBREV_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 1, killed $vgpr6, 0, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr8_vgpr9 = V_MUL_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_SUBREV_F32_e64 1, $vgpr6, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_min_num_f64_e64_mul_f32_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_min_num_f64_e64_mul_f32_neg + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = V_MIN_NUM_F64_e64 1, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_MUL_F32_e64 0, killed $vgpr6, 1, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_min_num_f64_e64_mul_f32_neg + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MIN_NUM_F64_e32_X_MUL_F32_e32_e96_gfx1250 1, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 1, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr8_vgpr9 = V_MIN_NUM_F64_e64 1, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_MUL_F32_e64 0, $vgpr6, 1, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_min_num_f64_e32_mul_legacy_f32_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_min_num_f64_e32_mul_legacy_f32_neg + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = V_MIN_NUM_F64_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_MUL_LEGACY_F32_e64 1, killed $vgpr6, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_min_num_f64_e32_mul_legacy_f32_neg + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MIN_NUM_F64_e32_X_MUL_LEGACY_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 1, killed $vgpr6, 0, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr8_vgpr9 = V_MIN_NUM_F64_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_MUL_LEGACY_F32_e64 1, $vgpr6, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_max_num_f64_e64_min_f32_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_max_num_f64_e64_min_f32_neg + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = V_MAX_NUM_F64_e64 0, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_MIN_F32_e64 1, killed $vgpr6, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_max_num_f64_e64_min_f32_neg + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MAX_NUM_F64_e32_X_MIN_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 1, killed $vgpr6, 0, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr8_vgpr9 = V_MAX_NUM_F64_e64 0, $vgpr0_vgpr1, 1, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_MIN_F32_e64 1, $vgpr6, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_max_num_f64_e32_max_f32_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_max_num_f64_e32_max_f32_neg + ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr8_vgpr9 = V_MAX_NUM_F64_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_MAX_F32_e64 0, killed $vgpr6, 1, killed $vgpr4, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_max_num_f64_e32_max_f32_neg + ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MAX_NUM_F64_e32_X_MAX_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 1, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0_vgpr1 = IMPLICIT_DEF + $vgpr2_vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr8_vgpr9 = V_MAX_NUM_F64_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr7 = V_MAX_F32_e64 0, $vgpr6, 1, $vgpr4, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_add_f64_fmac_f32_e64_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_add_f64_fmac_f32_e64_neg + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr2_vgpr3 = V_ADD_F64_pseudo_e32 10, killed $vgpr10_vgpr11, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr6 = V_FMAC_F32_e64 0, $vgpr0, 1, $vgpr1, 0, killed $vgpr6, 0, 0, implicit $mode, implicit $exec + ; SCHED-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 10, implicit $exec + ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_add_f64_fmac_f32_e64_neg + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr2_vgpr3, $vgpr6 = V_DUAL_ADD_F64_pseudo_e32_X_FMAC_F32_e32_e96_gfx1250 0, 10, 0, killed $vgpr10_vgpr11, 0, $vgpr0, 1, $vgpr1, killed $vgpr6, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec + ; PAIR-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 10, implicit $exec + ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + $vgpr10_vgpr11 = IMPLICIT_DEF + $vgpr2_vgpr3 = V_ADD_F64_pseudo_e32 10, $vgpr10_vgpr11, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MOV_B64_e32 10, implicit $exec + $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec + $vgpr6 = V_FMAC_F32_e64 0, $vgpr0, 1, $vgpr1, 0, $vgpr6, 0, 0, implicit $mode, implicit $exec +... + +--- +name: vopd_combine_cndmask_e64_neg_cndmask_e64_neg +tracksRegLiveness: true +body: | + bb.0: + + ; SCHED-LABEL: name: vopd_combine_cndmask_e64_neg_cndmask_e64_neg + ; SCHED: $vgpr0 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF + ; SCHED-NEXT: $vcc = IMPLICIT_DEF + ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 1, killed $vgpr0, 0, killed $vgpr1, $vcc_lo, implicit $exec + ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e64 1, killed $vgpr3, 0, killed $vgpr4, killed $vcc_lo, implicit $exec + ; + ; PAIR-LABEL: name: vopd_combine_cndmask_e64_neg_cndmask_e64_neg + ; PAIR: $vgpr0 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF + ; PAIR-NEXT: $vcc = IMPLICIT_DEF + ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_e96_gfx1250 1, killed $vgpr0, 0, killed $vgpr1, $vcc_lo, 1, killed $vgpr3, 0, killed $vgpr4, killed $vcc_lo, implicit $exec, implicit $exec, implicit $exec + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vcc = IMPLICIT_DEF + $vgpr6 = V_CNDMASK_B32_e64 1, $vgpr0, 0, $vgpr1, $vcc_lo, implicit $exec + $vgpr7 = V_CNDMASK_B32_e64 1, $vgpr3, 0, $vgpr4, $vcc_lo, implicit $exec +... + +--- +name: vopd_no_combine_dpp +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; SCHED-LABEL: name: vopd_no_combine_dpp + ; SCHED: liveins: $vgpr0, $vgpr1, $vgpr2 + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec + ; SCHED-NEXT: $vgpr0 = V_ADD_F32_e64_dpp killed $vgpr0, 0, killed $vgpr2, 0, killed $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec + ; + ; PAIR-LABEL: name: vopd_no_combine_dpp + ; PAIR: liveins: $vgpr0, $vgpr1, $vgpr2 + ; PAIR-NEXT: {{ $}} + ; PAIR-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec + ; PAIR-NEXT: $vgpr0 = V_ADD_F32_e64_dpp killed $vgpr0, 0, killed $vgpr2, 0, killed $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec + $vgpr3 = V_MOV_B32_e32 0, implicit $exec + $vgpr0 = V_ADD_F32_e64_dpp $vgpr0, 0, $vgpr2, 0, $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec +... diff --git a/llvm/test/CodeGen/ARM/min-max-combine.ll b/llvm/test/CodeGen/ARM/min-max-combine.ll new file mode 100644 index 0000000000000..8cb0d79f5e339 --- /dev/null +++ b/llvm/test/CodeGen/ARM/min-max-combine.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv7a < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -mtriple=armv7m < %s | FileCheck %s --check-prefix=THUMB2 +; RUN: llc -mtriple=thumbv8.1m.main < %s | FileCheck %s --check-prefix=THUMBV8 + +declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone + +define i8 @smaxi8_zero(i8 %a) { +; ARM-LABEL: smaxi8_zero: +; ARM: @ %bb.0: +; ARM-NEXT: sxtb r0, r0 +; ARM-NEXT: bic r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smaxi8_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: sxtb r0, r0 +; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: bics r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smaxi8_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: sxtb r0, r0 +; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smaxi8_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: sxtb r0, r0 +; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr + %c = call i8 @llvm.smax.i8(i8 %a, i8 0) + ret i8 %c +} + +declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone + +define i16 @smaxi16_zero(i16 %a) { +; ARM-LABEL: smaxi16_zero: +; ARM: @ %bb.0: +; ARM-NEXT: sxth r0, r0 +; ARM-NEXT: bic r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smaxi16_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: sxth r0, r0 +; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: bics r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smaxi16_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: sxth r0, r0 +; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smaxi16_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: sxth r0, r0 +; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr + %c = call i16 @llvm.smax.i16(i16 %a, i16 0) + ret i16 %c +} + +declare i32 @llvm.smax.i32(i32 %a, i32 %b) readnone + +define i32 @smaxi32_zero(i32 %a) { +; ARM-LABEL: smaxi32_zero: +; ARM: @ %bb.0: +; ARM-NEXT: bic r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smaxi32_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: bics r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smaxi32_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: bic.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smaxi32_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: bic.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr + %c = call i32 @llvm.smax.i32(i32 %a, i32 0) + ret i32 %c +} + +; SMIN + +declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone + +define i8 @smini8_zero(i8 %a) { +; ARM-LABEL: smini8_zero: +; ARM: @ %bb.0: +; ARM-NEXT: sxtb r0, r0 +; ARM-NEXT: and r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smini8_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: sxtb r1, r0 +; THUMB-NEXT: asrs r0, r1, #31 +; THUMB-NEXT: ands r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smini8_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: sxtb r0, r0 +; THUMB2-NEXT: and.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smini8_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: sxtb r0, r0 +; THUMBV8-NEXT: and.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr + %c = call i8 @llvm.smin.i8(i8 %a, i8 0) + ret i8 %c +} + +declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone + +define i16 @smini16_zero(i16 %a) { +; ARM-LABEL: smini16_zero: +; ARM: @ %bb.0: +; ARM-NEXT: sxth r0, r0 +; ARM-NEXT: and r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smini16_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: sxth r1, r0 +; THUMB-NEXT: asrs r0, r1, #31 +; THUMB-NEXT: ands r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smini16_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: sxth r0, r0 +; THUMB2-NEXT: and.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smini16_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: sxth r0, r0 +; THUMBV8-NEXT: and.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr + %c = call i16 @llvm.smin.i16(i16 %a, i16 0) + ret i16 %c +} + +declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone + +define i32 @smini32_zero(i32 %a) { +; ARM-LABEL: smini32_zero: +; ARM: @ %bb.0: +; ARM-NEXT: and r0, r0, r0, asr #31 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: smini32_zero: +; THUMB: @ %bb.0: +; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: ands r0, r1 +; THUMB-NEXT: bx lr +; +; THUMB2-LABEL: smini32_zero: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: and.w r0, r0, r0, asr #31 +; THUMB2-NEXT: bx lr +; +; THUMBV8-LABEL: smini32_zero: +; THUMBV8: @ %bb.0: +; THUMBV8-NEXT: and.w r0, r0, r0, asr #31 +; THUMBV8-NEXT: bx lr + %c = call i32 @llvm.smin.i32(i32 %a, i32 0) + ret i32 %c +} diff --git a/llvm/test/CodeGen/BPF/remove_truncate_9.ll b/llvm/test/CodeGen/BPF/remove_truncate_9.ll index dd3114926bcf1..5ea55ef81d650 100644 --- a/llvm/test/CodeGen/BPF/remove_truncate_9.ll +++ b/llvm/test/CodeGen/BPF/remove_truncate_9.ll @@ -1,5 +1,6 @@ -; RUN: llc -mcpu=v2 -mtriple=bpf < %s | FileCheck %s -; RUN: llc -mcpu=v4 -mtriple=bpf < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mcpu=v2 -mtriple=bpf < %s | FileCheck %s --check-prefixes=CHECK-V2 +; RUN: llc -mcpu=v4 -mtriple=bpf < %s | FileCheck %s --check-prefixes=CHECK-V4 ; Zero extension instructions should be eliminated at instruction ; selection phase for all test cases below. @@ -9,10 +10,36 @@ ; generated code (<<= remains because %c is used by both call and ; lshr in a few test cases). -; CHECK-NOT: &= -; CHECK-NOT: >>= - define void @shl_lshr_same_bb(ptr %p) { +; CHECK-V2-LABEL: shl_lshr_same_bb: +; CHECK-V2: # %bb.0: # %entry +; CHECK-V2-NEXT: r1 = *(u8 *)(r1 + 0) +; CHECK-V2-NEXT: r5 = 1 +; CHECK-V2-NEXT: if r1 == 0 goto LBB0_2 +; CHECK-V2-NEXT: # %bb.1: # %entry +; CHECK-V2-NEXT: r5 = 0 +; CHECK-V2-NEXT: LBB0_2: # %entry +; CHECK-V2-NEXT: r3 = r1 +; CHECK-V2-NEXT: r3 <<= 56 +; CHECK-V2-NEXT: r2 = r1 +; CHECK-V2-NEXT: r4 = r1 +; CHECK-V2-NEXT: call sink1 +; CHECK-V2-NEXT: exit +; +; CHECK-V4-LABEL: shl_lshr_same_bb: +; CHECK-V4: # %bb.0: # %entry +; CHECK-V4-NEXT: w1 = *(u8 *)(r1 + 0) +; CHECK-V4-NEXT: w5 = 1 +; CHECK-V4-NEXT: if w1 == 0 goto LBB0_2 +; CHECK-V4-NEXT: # %bb.1: # %entry +; CHECK-V4-NEXT: w5 = 0 +; CHECK-V4-NEXT: LBB0_2: # %entry +; CHECK-V4-NEXT: r3 = r1 +; CHECK-V4-NEXT: r3 <<= 56 +; CHECK-V4-NEXT: r2 = r1 +; CHECK-V4-NEXT: r4 = r1 +; CHECK-V4-NEXT: call sink1 +; CHECK-V4-NEXT: exit entry: %a = load i8, ptr %p, align 1 %b = zext i8 %a to i64 @@ -26,6 +53,35 @@ entry: } define void @shl_lshr_diff_bb(ptr %p) { +; CHECK-V2-LABEL: shl_lshr_diff_bb: +; CHECK-V2: # %bb.0: # %entry +; CHECK-V2-NEXT: r1 = *(u16 *)(r1 + 0) +; CHECK-V2-NEXT: r5 = 1 +; CHECK-V2-NEXT: if r1 == 0 goto LBB1_2 +; CHECK-V2-NEXT: # %bb.1: # %entry +; CHECK-V2-NEXT: r5 = 0 +; CHECK-V2-NEXT: LBB1_2: # %entry +; CHECK-V2-NEXT: r3 = r1 +; CHECK-V2-NEXT: r3 <<= 48 +; CHECK-V2-NEXT: r2 = r1 +; CHECK-V2-NEXT: r4 = r1 +; CHECK-V2-NEXT: call sink2 +; CHECK-V2-NEXT: exit +; +; CHECK-V4-LABEL: shl_lshr_diff_bb: +; CHECK-V4: # %bb.0: # %entry +; CHECK-V4-NEXT: w1 = *(u16 *)(r1 + 0) +; CHECK-V4-NEXT: w5 = 1 +; CHECK-V4-NEXT: if w1 == 0 goto LBB1_2 +; CHECK-V4-NEXT: # %bb.1: # %entry +; CHECK-V4-NEXT: w5 = 0 +; CHECK-V4-NEXT: LBB1_2: # %entry +; CHECK-V4-NEXT: r3 = r1 +; CHECK-V4-NEXT: r3 <<= 48 +; CHECK-V4-NEXT: r2 = r1 +; CHECK-V4-NEXT: r4 = r1 +; CHECK-V4-NEXT: call sink2 +; CHECK-V4-NEXT: exit entry: %a = load i16, ptr %p, align 2 %b = zext i16 %a to i64 @@ -45,6 +101,27 @@ next: } define void @load_zext_same_bb(ptr %p) { +; CHECK-V2-LABEL: load_zext_same_bb: +; CHECK-V2: # %bb.0: # %entry +; CHECK-V2-NEXT: r1 = *(u8 *)(r1 + 0) +; CHECK-V2-NEXT: r2 = 1 +; CHECK-V2-NEXT: if r1 == 0 goto LBB2_2 +; CHECK-V2-NEXT: # %bb.1: # %entry +; CHECK-V2-NEXT: r2 = 0 +; CHECK-V2-NEXT: LBB2_2: # %entry +; CHECK-V2-NEXT: call sink3 +; CHECK-V2-NEXT: exit +; +; CHECK-V4-LABEL: load_zext_same_bb: +; CHECK-V4: # %bb.0: # %entry +; CHECK-V4-NEXT: w1 = *(u8 *)(r1 + 0) +; CHECK-V4-NEXT: w2 = 1 +; CHECK-V4-NEXT: if w1 == 0 goto LBB2_2 +; CHECK-V4-NEXT: # %bb.1: # %entry +; CHECK-V4-NEXT: w2 = 0 +; CHECK-V4-NEXT: LBB2_2: # %entry +; CHECK-V4-NEXT: call sink3 +; CHECK-V4-NEXT: exit entry: %a = load i8, ptr %p, align 1 ; zext is implicit in this context @@ -54,6 +131,27 @@ entry: } define void @load_zext_diff_bb(ptr %p) { +; CHECK-V2-LABEL: load_zext_diff_bb: +; CHECK-V2: # %bb.0: # %entry +; CHECK-V2-NEXT: r1 = *(u8 *)(r1 + 0) +; CHECK-V2-NEXT: r2 = 1 +; CHECK-V2-NEXT: if r1 == 0 goto LBB3_2 +; CHECK-V2-NEXT: # %bb.1: # %next +; CHECK-V2-NEXT: r2 = 0 +; CHECK-V2-NEXT: LBB3_2: # %next +; CHECK-V2-NEXT: call sink3 +; CHECK-V2-NEXT: exit +; +; CHECK-V4-LABEL: load_zext_diff_bb: +; CHECK-V4: # %bb.0: # %entry +; CHECK-V4-NEXT: w1 = *(u8 *)(r1 + 0) +; CHECK-V4-NEXT: w2 = 1 +; CHECK-V4-NEXT: if w1 == 0 goto LBB3_2 +; CHECK-V4-NEXT: # %bb.1: # %next +; CHECK-V4-NEXT: w2 = 0 +; CHECK-V4-NEXT: LBB3_2: # %next +; CHECK-V4-NEXT: call sink3 +; CHECK-V4-NEXT: exit entry: %a = load i8, ptr %p, align 1 br label %next @@ -65,6 +163,27 @@ next: } define void @load_zext_diff_bb_2(ptr %p) { +; CHECK-V2-LABEL: load_zext_diff_bb_2: +; CHECK-V2: # %bb.0: # %entry +; CHECK-V2-NEXT: r1 = *(u32 *)(r1 + 0) +; CHECK-V2-NEXT: r2 = 1 +; CHECK-V2-NEXT: if r1 == 0 goto LBB4_2 +; CHECK-V2-NEXT: # %bb.1: # %next +; CHECK-V2-NEXT: r2 = 0 +; CHECK-V2-NEXT: LBB4_2: # %next +; CHECK-V2-NEXT: call sink4 +; CHECK-V2-NEXT: exit +; +; CHECK-V4-LABEL: load_zext_diff_bb_2: +; CHECK-V4: # %bb.0: # %entry +; CHECK-V4-NEXT: w1 = *(u32 *)(r1 + 0) +; CHECK-V4-NEXT: w2 = 1 +; CHECK-V4-NEXT: if w1 == 0 goto LBB4_2 +; CHECK-V4-NEXT: # %bb.1: # %next +; CHECK-V4-NEXT: w2 = 0 +; CHECK-V4-NEXT: LBB4_2: # %next +; CHECK-V4-NEXT: call sink4 +; CHECK-V4-NEXT: exit entry: %a = load i32, ptr %p, align 4 br label %next diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll new file mode 100644 index 0000000000000..736c86ebb1299 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/lifetimes-noint64op.ll @@ -0,0 +1,36 @@ +; RUN: opt -S --passes="print-dx-shader-flags" 2>&1 %s | FileCheck %s +; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s --check-prefix=DXC + +target triple = "dxil-pc-shadermodel6.7-library" + +; CHECK: ; Combined Shader Flags for Module +; CHECK-NEXT: ; Shader Flags Value: 0x00000000 +; CHECK-NEXT: ; +; CHECK-NOT: ; Note: shader requires additional functionality: +; CHECK-NOT: ; 64-Bit integer +; CHECK-NOT: ; Note: extra DXIL module flags: +; CHECK-NOT: ; +; CHECK-NEXT: ; Shader Flags for Module Functions +; CHECK-NEXT: ; Function lifetimes : 0x00000000 + +define void @lifetimes() #0 { + %a = alloca [4 x i32], align 8 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %a) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %a) + ret void +} + +; Function Attrs: nounwind memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64, ptr) #1 + +; Function Attrs: nounwind memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64, ptr) #1 + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} +attributes #1 = { nounwind memory(argmem: readwrite) } + +; DXC: - Name: SFI0 +; DXC-NEXT: Size: 8 +; DXC-NOT: Flags: +; DXC-NOT: Int64Ops: true +; DXC: ... diff --git a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll index 6552ccddddab4..f77df2d812dfe 100644 --- a/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll +++ b/llvm/test/CodeGen/DirectX/legalize-lifetimes-valver-1.6.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -passes='dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,CHECK-SM63 ; RUN: opt -S -passes='dxil-op-lower' -mtriple=dxil-pc-shadermodel6.6-library %s | FileCheck %s --check-prefixes=CHECK,CHECK-SM66 +; RUN: opt -S -dxil-op-lower -dxil-prepare -mtriple=dxil-pc-shadermodel6.6-library %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREPARE ; CHECK-LABEL: define void @test_legal_lifetime() { ; @@ -15,6 +16,14 @@ ; CHECK-SM66-NEXT: store i32 0, ptr [[GEP]], align 4 ; CHECK-SM66-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[ACCUM_I_FLAT]]) ; +; CHECK-PREPARE-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4 +; CHECK-PREPARE-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[ACCUM_I_FLAT]], i32 0 +; CHECK-PREPARE-NEXT: [[BITCAST:%.*]] = bitcast ptr [[ACCUM_I_FLAT]] to ptr +; CHECK-PREPARE-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[BITCAST]]) +; CHECK-PREPARE-NEXT: store i32 0, ptr [[GEP]], align 4 +; CHECK-PREPARE-NEXT: [[BITCAST:%.*]] = bitcast ptr [[ACCUM_I_FLAT]] to ptr +; CHECK-PREPARE-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[BITCAST]]) +; ; CHECK-NEXT: ret void ; define void @test_legal_lifetime() { @@ -26,6 +35,22 @@ define void @test_legal_lifetime() { ret void } +; CHECK-PREPARE-DAG: attributes [[LIFETIME_ATTRS:#.*]] = { nounwind } + +; CHECK-PREPARE-DAG: ; Function Attrs: nounwind +; CHECK-PREPARE-DAG: declare void @llvm.lifetime.start.p0(i64, ptr) [[LIFETIME_ATTRS]] + +; CHECK-PREPARE-DAG: ; Function Attrs: nounwind +; CHECK-PREPARE-DAG: declare void @llvm.lifetime.end.p0(i64, ptr) [[LIFETIME_ATTRS]] + +; Function Attrs: nounwind memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64, ptr) #0 + +; Function Attrs: nounwind memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64, ptr) #0 + +attributes #0 = { nounwind memory(argmem: readwrite) } + ; Set the validator version to 1.6 !dx.valver = !{!0} !0 = !{i32 1, i32 6} diff --git a/llvm/test/CodeGen/Hexagon/addsat.ll b/llvm/test/CodeGen/Hexagon/addsat.ll new file mode 100644 index 0000000000000..489c7d5a0fdff --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/addsat.ll @@ -0,0 +1,157 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Test for saturating add instructions. + +; CHECK-LABEL: test1 +; CHECK: v{{.*}}.ub = vadd(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub):sat +define <128 x i8> @test1(<128 x i8>* %a0, <128 x i8>* %a1) #0 { +entry: + %wide.load = load <128 x i8>, <128 x i8>* %a0, align 1 + %wide.load62 = load <128 x i8>, <128 x i8>* %a1, align 1 + %add = call <128 x i8> @llvm.uadd.sat.v128i8(<128 x i8> %wide.load, <128 x i8> %wide.load62) + ret <128 x i8> %add +} + +; CHECK-LABEL: test2 +; CHECK: v{{.*}}.b = vadd(v{{[0-9]+}}.b,v{{[0-9]+}}.b):sat +define <128 x i8> @test2(<128 x i8>* %a0, <128 x i8>* %a1) #0 { +entry: + %wide.load = load <128 x i8>, <128 x i8>* %a0, align 1 + %wide.load62 = load <128 x i8>, <128 x i8>* %a1, align 1 + %add = call <128 x i8> @llvm.sadd.sat.v128i8(<128 x i8> %wide.load, <128 x i8> %wide.load62) + ret <128 x i8> %add +} + +; CHECK-LABEL: test3 +; CHECK: v{{.*}}.uh = vadd(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh):sat +define <64 x i16> @test3(<64 x i16>* %a0, <64 x i16>* %a1) #0 { +entry: + %wide.load = load <64 x i16>, <64 x i16>* %a0, align 1 + %wide.load62 = load <64 x i16>, <64 x i16>* %a1, align 1 + %add = call <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16> %wide.load, <64 x i16> %wide.load62) + ret <64 x i16> %add +} + +; CHECK-LABEL: test4 +; CHECK: v{{.*}}.h = vadd(v{{[0-9]+}}.h,v{{[0-9]+}}.h):sat +define <64 x i16> @test4(<64 x i16>* %a0, <64 x i16>* %a1) #0 { +entry: + %wide.load = load <64 x i16>, <64 x i16>* %a0, align 1 + %wide.load62 = load <64 x i16>, <64 x i16>* %a1, align 1 + %add = call <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16> %wide.load, <64 x i16> %wide.load62) + ret <64 x i16> %add +} + +; CHECK-LABEL: test5 +; CHECK: v{{.*}}.uw = vadd(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw):sat +define <32 x i32> @test5(<32 x i32>* %a0, <32 x i32>* %a1) #0 { +entry: + %wide.load = load <32 x i32>, <32 x i32>* %a0, align 1 + %wide.load62 = load <32 x i32>, <32 x i32>* %a1, align 1 + %add = call <32 x i32> @llvm.uadd.sat.v32i32(<32 x i32> %wide.load, <32 x i32> %wide.load62) + ret <32 x i32> %add +} + +; CHECK-LABEL: test6 +; CHECK: v{{.*}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w):sat +define <32 x i32> @test6(<32 x i32>* %a0, <32 x i32>* %a1) #0 { +entry: + %wide.load = load <32 x i32>, <32 x i32>* %a0, align 1 + %wide.load62 = load <32 x i32>, <32 x i32>* %a1, align 1 + %add = call <32 x i32> @llvm.sadd.sat.v32i32(<32 x i32> %wide.load, <32 x i32> %wide.load62) + ret <32 x i32> %add +} + +; CHECK-LABEL: test7 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.ub = vadd(v{{[0-9]+}}:{{[0-9]+}}.ub,v{{[0-9]+}}:{{[0-9]+}}.ub):sat +define <256 x i8> @test7(<256 x i8>* %a0, <256 x i8>* %a1) #0 { +entry: + %wide.load = load <256 x i8>, <256 x i8>* %a0, align 1 + %wide.load62 = load <256 x i8>, <256 x i8>* %a1, align 1 + %add = call <256 x i8> @llvm.uadd.sat.v256i8(<256 x i8> %wide.load, <256 x i8> %wide.load62) + ret <256 x i8> %add +} + +; CHECK-LABEL: test8 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.b = vadd(v{{[0-9]+}}:{{[0-9]+}}.b,v{{[0-9]+}}:{{[0-9]+}}.b):sat +define <256 x i8> @test8(<256 x i8>* %a0, <256 x i8>* %a1) #0 { +entry: + %wide.load = load <256 x i8>, <256 x i8>* %a0, align 1 + %wide.load62 = load <256 x i8>, <256 x i8>* %a1, align 1 + %add = call <256 x i8> @llvm.sadd.sat.v256i8(<256 x i8> %wide.load, <256 x i8> %wide.load62) + ret <256 x i8> %add +} + +; CHECK-LABEL: test9 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uh = vadd(v{{[0-9]+}}:{{[0-9]+}}.uh,v{{[0-9]+}}:{{[0-9]+}}.uh):sat +define <128 x i16> @test9(<128 x i16>* %a0, <128 x i16>* %a1) #0 { +entry: + %wide.load = load <128 x i16>, <128 x i16>* %a0, align 1 + %wide.load62 = load <128 x i16>, <128 x i16>* %a1, align 1 + %add = call <128 x i16> @llvm.uadd.sat.v128i16(<128 x i16> %wide.load, <128 x i16> %wide.load62) + ret <128 x i16> %add +} + +; CHECK-LABEL: test10 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h = vadd(v{{[0-9]+}}:{{[0-9]+}}.h,v{{[0-9]+}}:{{[0-9]+}}.h):sat +define <128 x i16> @test10(<128 x i16>* %a0, <128 x i16>* %a1) #0 { +entry: + %wide.load = load <128 x i16>, <128 x i16>* %a0, align 1 + %wide.load62 = load <128 x i16>, <128 x i16>* %a1, align 1 + %add = call <128 x i16> @llvm.sadd.sat.v128i16(<128 x i16> %wide.load, <128 x i16> %wide.load62) + ret <128 x i16> %add +} + +; CHECK-LABEL: test11 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uw = vadd(v{{[0-9]+}}:{{[0-9]+}}.uw,v{{[0-9]+}}:{{[0-9]+}}.uw):sat +define <64 x i32> @test11(<64 x i32>* %a0, <64 x i32>* %a1) #0 { +entry: + %wide.load = load <64 x i32>, <64 x i32>* %a0, align 1 + %wide.load62 = load <64 x i32>, <64 x i32>* %a1, align 1 + %add = call <64 x i32> @llvm.uadd.sat.v64i32(<64 x i32> %wide.load, <64 x i32> %wide.load62) + ret <64 x i32> %add +} + +; CHECK-LABEL: test12 +; CHECK: v{{[0-9]+}}:{{[0-9]+}}.w = vadd(v{{[0-9]+}}:{{[0-9]+}}.w,v{{[0-9]+}}:{{[0-9]+}}.w):sat +define <64 x i32> @test12(<64 x i32>* %a0, <64 x i32>* %a1) #0 { +entry: + %wide.load = load <64 x i32>, <64 x i32>* %a0, align 1 + %wide.load62 = load <64 x i32>, <64 x i32>* %a1, align 1 + %add = call <64 x i32> @llvm.sadd.sat.v64i32(<64 x i32> %wide.load, <64 x i32> %wide.load62) + ret <64 x i32> %add +} + +; CHECK-LABEL: test13 +; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},r{{[0-9]+}}):sat +define i32 @test13(i32 %a0, i32 %a1) #0 { +entry: + %add = call i32 @llvm.sadd.sat.i32(i32 %a0, i32 %a1) + ret i32 %add +} + +; CHECK-LABEL: test14 +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = add(r{{[0-9]+}}:{{[0-9]+}},r{{[0-9]+}}:{{[0-9]+}}):sat +define i64 @test14(i64 %a0, i64 %a1) #0 { +entry: + %add = call i64 @llvm.sadd.sat.i64(i64 %a0, i64 %a1) + ret i64 %add +} + +declare <128 x i8> @llvm.uadd.sat.v128i8(<128 x i8>, <128 x i8>) #1 +declare <128 x i8> @llvm.sadd.sat.v128i8(<128 x i8>, <128 x i8>) #1 +declare <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16>, <64 x i16>) #1 +declare <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16>, <64 x i16>) #1 +declare <32 x i32> @llvm.uadd.sat.v32i32(<32 x i32>, <32 x i32>) #1 +declare <32 x i32> @llvm.sadd.sat.v32i32(<32 x i32>, <32 x i32>) #1 +declare <256 x i8> @llvm.uadd.sat.v256i8(<256 x i8>, <256 x i8>) #1 +declare <256 x i8> @llvm.sadd.sat.v256i8(<256 x i8>, <256 x i8>) #1 +declare <128 x i16> @llvm.uadd.sat.v128i16(<128 x i16>, <128 x i16>) #1 +declare <128 x i16> @llvm.sadd.sat.v128i16(<128 x i16>, <128 x i16>) #1 +declare <64 x i32> @llvm.uadd.sat.v64i32(<64 x i32>, <64 x i32>) #1 +declare <64 x i32> @llvm.sadd.sat.v64i32(<64 x i32>, <64 x i32>) #1 +declare i32 @llvm.sadd.sat.i32(i32, i32) +declare i64 @llvm.sadd.sat.i64(i64, i64) + +attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" } +attributes #1 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll new file mode 100644 index 0000000000000..741589d3cde74 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll @@ -0,0 +1,20 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s + +; CHECK: [[VREG1:v([0-9]+)]] = vsplat(r{{[0-9]*}}) +; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}}) +; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}}) + +define void @bitcast_i32_to_v32i1_full(ptr %in, ptr %out) { +entry: + %load = load i32, ptr %in, align 4 + %bitcast = bitcast i32 %load to <32 x i1> + %e0 = extractelement <32 x i1> %bitcast, i32 0 + %e1 = extractelement <32 x i1> %bitcast, i32 1 + %z0 = zext i1 %e0 to i8 + %z1 = zext i1 %e1 to i8 + %ptr0 = getelementptr i8, ptr %out, i32 0 + %ptr1 = getelementptr i8, ptr %out, i32 1 + store i8 %z0, ptr %ptr0, align 1 + store i8 %z1, ptr %ptr1, align 1 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll new file mode 100644 index 0000000000000..45068e8e080b8 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll @@ -0,0 +1,16 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s + +; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0) +; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}}) +; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}}) + +define void @bitcast_v2i16_to_v32i1(ptr %in, ptr %out) { +entry: + %load = load <2 x i16>, ptr %in, align 4 + %bitcast = bitcast <2 x i16> %load to <32 x i1> + %extract = extractelement <32 x i1> %bitcast, i32 0 + %zext = zext i1 %extract to i8 + store i8 %zext, ptr %out, align 1 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll b/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll new file mode 100644 index 0000000000000..15219332856c5 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll @@ -0,0 +1,16 @@ +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s + +; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0) +; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]]) +; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}}) +; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}}) + +define void @bitcast_v4i8_to_v32i1(ptr %in, ptr %out) { +entry: + %load = load <4 x i8>, ptr %in, align 4 + %bitcast = bitcast <4 x i8> %load to <32 x i1> + %extract = extractelement <32 x i1> %bitcast, i32 0 + %zext = zext i1 %extract to i8 + store i8 %zext, ptr %out, align 1 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1.ll b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1.ll index 1090b64fcad52..c91f16d91d1be 100644 --- a/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1.ll +++ b/llvm/test/CodeGen/Hexagon/isel/trunc-vNi1.ll @@ -1,24 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=hexagon < %s | FileCheck %s +; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s define void @f0(<2 x i32> %a0, ptr %a1) { ; CHECK-LABEL: f0: -; CHECK: .cfi_startproc -; CHECK-NEXT: // %bb.0: // %b0 -; CHECK-NEXT: { -; CHECK-NEXT: r5:4 = combine(#1,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r1:0 = and(r1:0,r5:4) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmpw.eq(r1:0,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = p0 -; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: memb(r2+#0) = r0.new -; CHECK-NEXT: } +; CHECK: r[[REG1H:([0-9]+)]]:[[REG1L:([0-9]+)]] = combine(#1,#1) +; CHECK: r[[REG2H:([0-9]+)]]:[[REG2L:([0-9]+)]] = and(r[[REG2H]]:[[REG2L]],r[[REG1H]]:[[REG1L]]) +; CHECK: p{{[0-9]+}} = vcmpw.eq(r[[REG2H]]:[[REG2L]],#1) b0: %v0 = trunc <2 x i32> %a0 to <2 x i1> store <2 x i1> %v0, ptr %a1, align 1 @@ -27,20 +14,9 @@ b0: define void @f1(<4 x i16> %a0, ptr %a1) { ; CHECK-LABEL: f1: -; CHECK: .cfi_startproc -; CHECK-NEXT: // %bb.0: // %b0 -; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,##65537) -; CHECK-NEXT: r1 = and(r1,##65537) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmph.eq(r1:0,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = p0 -; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: memb(r2+#0) = r0.new -; CHECK-NEXT: } +; CHECK: [[REG0:r([0-9]+)]] = and([[REG0]],##65537) +; CHECK: [[REG1:r([0-9]+)]] = and([[REG1]],##65537) +; CHECK: p{{[0-9]+}} = vcmph.eq(r{{[0-9]+}}:{{[0-9]+}},#1) b0: %v0 = trunc <4 x i16> %a0 to <4 x i1> store <4 x i1> %v0, ptr %a1, align 1 @@ -49,22 +25,35 @@ b0: define void @f2(<8 x i8> %a0, ptr %a1) { ; CHECK-LABEL: f2: -; CHECK: .cfi_startproc -; CHECK-NEXT: // %bb.0: // %b0 -; CHECK-NEXT: { -; CHECK-NEXT: r0 = and(r0,##16843009) -; CHECK-NEXT: r1 = and(r1,##16843009) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmpb.eq(r1:0,#1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = p0 -; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: memb(r2+#0) = r0.new -; CHECK-NEXT: } +; CHECK: [[REG0:r([0-9]+)]] = and([[REG0]],##16843009) +; CHECK: [[REG1:r([0-9]+)]] = and([[REG1]],##16843009) +; CHECK: p{{[0-9]+}} = vcmpb.eq(r{{[0-9]+}}:{{[0-9]+}},#1) b0: %v0 = trunc <8 x i8> %a0 to <8 x i1> store <8 x i1> %v0, ptr %a1, align 1 ret void } + +define void @f3(<4 x i8> %a0, ptr %a1) { +; CHECK-LABEL: f3: +; CHECK: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = vzxtbh(r{{[0-9]+}}) +; CHECK: r[[REGL]] = and(r[[REGL]],##65537) +; CHECK: r[[REGH]] = and(r[[REGH]],##65537) +; CHECK: p{{[0-9]+}} = vcmph.eq(r[[REGH]]:[[REGL]],#1) +b0: + %v0 = trunc <4 x i8> %a0 to <4 x i1> + store <4 x i1> %v0, ptr %a1, align 1 + ret void +} + +define void @f4(<2 x i16> %a0, ptr %a1) { +; CHECK-LABEL: f4: +; CHECK: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = vzxthw(r{{[0-9]+}}) +; CHECK: r[[REG1H:([0-9]+)]]:[[REG1L:([0-9]+)]] = combine(#1,#1) +; CHECK: r[[REGH]]:[[REGL]] = and(r[[REGH]]:[[REGL]],r[[REG1H]]:[[REG1L]]) +; CHECK: p{{[0-9]+}} = vcmpw.eq(r[[REGH]]:[[REGL]],#1) +b0: + %v0 = trunc <2 x i16> %a0 to <2 x i1> + store <2 x i1> %v0, ptr %a1, align 1 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir index 17ee07f49324a..7182e0a112560 100644 --- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir +++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir @@ -14,16 +14,14 @@ # ``` # # Loop-carried dependencies exist from store for a[i+1] to load/store for a[i], but not vice versa. -# FIXME: Currently the following dependencies are missed. -# -# Loop carried edges from SU(6) -# Order -# SU(4) -# Loop carried edges from SU(8) -# Order -# SU(4) # CHECK: ===== Loop Carried Edges Begin ===== +# CHECK-NEXT: Loop carried edges from SU(6) +# CHECK-NEXT: Order +# CHECK-NEXT: SU(4) +# CHECK-NEXT: Loop carried edges from SU(8) +# CHECK-NEXT: Order +# CHECK-NEXT: SU(4) # CHECK-NEXT: ===== Loop Carried Edges End ===== --- | diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir index 850e602c9146f..56485e04ad35c 100644 --- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir +++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir @@ -14,16 +14,14 @@ # ``` # # Loop-carried dependencies exist from load/store for a[i] to store for a[i-1], but not vice versa. -# FIXME: Currently the following dependencies are missed. -# -# Loop carried edges from SU(5) -# Order -# SU(7) # CHECK: ===== Loop Carried Edges Begin ===== # CHECK-NEXT: Loop carried edges from SU(3) # CHECK-NEXT: Order # CHECK-NEXT: SU(7) +# CHECK-NEXT: Loop carried edges from SU(5) +# CHECK-NEXT: Order +# CHECK-NEXT: SU(7) # CHECK-NEXT: ===== Loop Carried Edges End ===== --- | diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir index ca59b97dd11e9..69f56fa7934f2 100644 --- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir +++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir @@ -14,13 +14,11 @@ # ``` # # Loop-carried dependencies exist from load for a[i+1] to store for a[i]. -# FIXME: Currently the following dependencies are missed. -# -# Loop carried edges from SU(7) -# Order -# SU(5) # CHECK: ===== Loop Carried Edges Begin ===== +# CHECK-NEXT: Loop carried edges from SU(7) +# CHECK-NEXT: Order +# CHECK-NEXT: SU(5) # CHECK-NEXT: ===== Loop Carried Edges End ===== --- | diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir index 4bc4b48735947..cc4e9e1d67c5c 100644 --- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir +++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir @@ -20,15 +20,15 @@ # # FIXME: Currently the following dependencies are missed. # -# Loop carried edges from SU(4) -# Order -# SU(3) # CHECK: ===== Loop Carried Edges Begin ===== # CHECK-NEXT: Loop carried edges from SU(2) # CHECK-NEXT: Order # CHECK-NEXT: SU(3) # CHECK-NEXT: SU(4) +# CHECK-NEXT: Loop carried edges from SU(4) +# CHECK-NEXT: Order +# CHECK-NEXT: SU(3) # CHECK-NEXT: ===== Loop Carried Edges End ===== --- | diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir index 77c3d569db181..3c2e0c40680c8 100644 --- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir +++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir @@ -23,20 +23,18 @@ # Note that if there is already a dependency between two instructions, we don't # add loop-carried on between them since non-loop-carried one imposes stronger # constraint than loop-carried one. -# -# FIXME: Currently the following dependencies are missed. -# Loop carried edges from SU(5) -# Order -# SU(2) -# Loop carried edges from SU(6) -# Order -# SU(5) -# Loop carried edges from SU(8) -# Order -# SU(3) -# SU(5) # CHECK: ===== Loop Carried Edges Begin ===== +# CHECK-NEXT: Loop carried edges from SU(5) +# CHECK-NEXT: Order +# CHECK-NEXT: SU(2) +# CHECK-NEXT: Loop carried edges from SU(6) +# CHECK-NEXT: Order +# CHECK-NEXT: SU(5) +# CHECK-NEXT: Loop carried edges from SU(8) +# CHECK-NEXT: Order +# CHECK-NEXT: SU(3) +# CHECK-NEXT: SU(5) # CHECK-NEXT: ===== Loop Carried Edges End ===== --- | diff --git a/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll new file mode 100644 index 0000000000000..231e82a6d53ac --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll @@ -0,0 +1,218 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @concat_poison_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: concat_poison_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %a, <16 x i8> poison, + <32 x i32> + ret <32 x i8> %1 +} + +define <32 x i8> @concat_poison_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: concat_poison_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %b, <16 x i8> poison, + <32 x i32> + ret <32 x i8> %1 +} + +define <32 x i8> @concat_vectors_v32i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: concat_vectors_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i8> %a, <16 x i8> %b, + <32 x i32> + ret <32 x i8> %1 +} + +define <16 x i16> @concat_poison_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: concat_poison_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, + <16 x i32> + ret <16 x i16> %1 +} + +define <16 x i16> @concat_poison_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: concat_poison_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %b, <8 x i16> poison, + <16 x i32> + ret <16 x i16> %1 +} + +define <16 x i16> @concat_vectors_v16i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: concat_vectors_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i16> %a, <8 x i16> %b, + <16 x i32> + ret <16 x i16> %1 +} + +define <8 x i32> @concat_poison_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: concat_poison_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32> @concat_poison_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: concat_poison_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %b, <4 x i32> poison, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32> @concat_vectors_v8i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: concat_vectors_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, + <8 x i32> + ret <8 x i32> %1 +} + +define <8 x float> @concat_poison_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: concat_poison_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %a, <4 x float> poison, + <8 x i32> + ret <8 x float> %1 +} + +define <8 x float> @concat_poison_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: concat_poison_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %b, <4 x float> poison, + <8 x i32> + ret <8 x float> %1 +} + +define <8 x float> @concat_vectors_v8f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: concat_vectors_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x float> %a, <4 x float> %b, + <8 x i32> + ret <8 x float> %1 +} + +define <4 x i64> @concat_poison_v8i64_1(<2 x i64> %a) { +; CHECK-LABEL: concat_poison_v8i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x i64> @concat_poison_v8i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: concat_poison_v8i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %b, <2 x i64> poison, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x i64> @concat_vectors_v8i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: concat_vectors_v8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + ret <4 x i64> %1 +} + +define <4 x double> @concat_poison_v8f64_1(<2 x double> %a) { +; CHECK-LABEL: concat_poison_v8f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %a, <2 x double> poison, <4 x i32> + ret <4 x double> %1 +} + +define <4 x double> @concat_poison_v8f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: concat_poison_v8f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> + ret <4 x double> %1 +} + +define <4 x double> @concat_vectors_v8f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: concat_vectors_v8f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> + ret <4 x double> %1 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll new file mode 100644 index 0000000000000..7a90afca376db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll @@ -0,0 +1,668 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64) + +define <8 x i32> @insert_lo128_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: insert_lo128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_1(<4 x i32> %a) { +; CHECK-LABEL: insert_hi128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_lo128_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_lo128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_hi128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_lo128_v8i32_3(<8 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_lo128_v8i32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @insert_hi128_v8i32_3(<8 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: insert_hi128_v8i32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 4) + ret <8 x i32> %1 +} + +declare <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float>, <4 x float>, i64) + +define <8 x float> @insert_lo128_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: insert_lo128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_1(<4 x float> %a) { +; CHECK-LABEL: insert_hi128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 4) + ret <8 x float> %1 +} + +define <8 x float> @insert_lo128_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_lo128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %b, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_2(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_hi128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %b, i64 4) + ret <8 x float> %1 +} + +define <8 x float> @insert_lo128_v8f32_3(<8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_lo128_v8f32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> %a, <4 x float> %b, i64 0) + ret <8 x float> %1 +} + +define <8 x float> @insert_hi128_v8f32_3(<8 x float> %a, <4 x float> %b) { +; CHECK-LABEL: insert_hi128_v8f32_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> %a, <4 x float> %b, i64 4) + ret <8 x float> %1 +} + +declare <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64>, <2 x i64>, i64) + +define <4 x i64> @insert_lo128_v4i64_1(<2 x i64> %a) { +; CHECK-LABEL: insert_lo128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %a, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_1(<2 x i64> %a) { +; CHECK-LABEL: insert_hi128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %a, i64 2) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_lo128_v4i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_lo128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %b, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_2(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_hi128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %b, i64 2) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_lo128_v4i64_3(<4 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_lo128_v4i64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> %a, <2 x i64> %b, i64 0) + ret <4 x i64> %1 +} + +define <4 x i64> @insert_hi128_v4i64_3(<4 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: insert_hi128_v4i64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> %a, <2 x i64> %b, i64 2) + ret <4 x i64> %1 +} + +declare <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double>, <2 x double>, i64) + +define <4 x double> @insert_lo128_v4f64_1(<2 x double> %a) { +; CHECK-LABEL: insert_lo128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %a, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_1(<2 x double> %a) { +; CHECK-LABEL: insert_hi128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %a, i64 2) + ret <4 x double> %1 +} + +define <4 x double> @insert_lo128_v4f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_lo128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %b, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_2(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_hi128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %b, i64 2) + ret <4 x double> %1 +} + +define <4 x double> @insert_lo128_v4f64_3(<4 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_lo128_v4f64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 0) + ret <4 x double> %1 +} + +define <4 x double> @insert_hi128_v4f64_3(<4 x double> %a, <2 x double> %b) { +; CHECK-LABEL: insert_hi128_v4f64_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 2) + ret <4 x double> %1 +} + +declare <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16>, <8 x i16>, i64) + +define <16 x i16> @insert_lo128_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: insert_lo128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %a, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_1(<8 x i16> %a) { +; CHECK-LABEL: insert_hi128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %a, i64 8) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_lo128_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_lo128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %b, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_hi128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %b, i64 8) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_lo128_v16i16_3(<16 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_lo128_v16i16_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> %a, <8 x i16> %b, i64 0) + ret <16 x i16> %1 +} + +define <16 x i16> @insert_hi128_v16i16_3(<16 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: insert_hi128_v16i16_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> %a, <8 x i16> %b, i64 8) + ret <16 x i16> %1 +} + +declare <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8>, <16 x i8>, i64) + +define <32 x i8> @insert_lo128_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: insert_lo128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %a, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_1(<16 x i8> %a) { +; CHECK-LABEL: insert_hi128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %a, i64 16) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_lo128_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_lo128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr1, 0 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %b, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_hi128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %b, i64 16) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_lo128_v32i8_3(<32 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_lo128_v32i8_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> %a, <16 x i8> %b, i64 0) + ret <32 x i8> %1 +} + +define <32 x i8> @insert_hi128_v32i8_3(<32 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: insert_hi128_v32i8_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ret +entry: + %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> %a, <16 x i8> %b, i64 16) + ret <32 x i8> %1 +} + +define <4 x i32> @extract_lo128_v8i32_1(<8 x i32> %a) { +; CHECK-LABEL: extract_lo128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_hi128_v8i32_1(<8 x i32> %a) { +; CHECK-LABEL: extract_hi128_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_lo128_v8i32_2(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: extract_lo128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @extract_hi128_v8i32_2(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: extract_hi128_v8i32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + +define <4 x float> @extract_lo128_v8f32_1(<8 x float> %a) { +; CHECK-LABEL: extract_lo128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_hi128_v8f32_1(<8 x float> %a) { +; CHECK-LABEL: extract_hi128_v8f32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_lo128_v8f32_2(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: extract_lo128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <4 x float> @extract_hi128_v8f32_2(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: extract_hi128_v8f32_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> + ret <4 x float> %1 +} + +define <2 x i64> @extract_lo128_v4i64_1(<4 x i64> %a) { +; CHECK-LABEL: extract_lo128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %a, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_hi128_v4i64_1(<4 x i64> %a) { +; CHECK-LABEL: extract_hi128_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %a, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_lo128_v4i64_2(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: extract_lo128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %b, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x i64> @extract_hi128_v4i64_2(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: extract_hi128_v4i64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x i64> %b, <4 x i64> poison, <2 x i32> + ret <2 x i64> %1 +} + +define <2 x double> @extract_lo128_v4f64_a(<4 x double> %a) { +; CHECK-LABEL: extract_lo128_v4f64_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_hi128_v4f64_1(<4 x double> %a) { +; CHECK-LABEL: extract_hi128_v4f64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_lo128_v4f64_2(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: extract_lo128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <2 x double> @extract_hi128_v4f64_2(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: extract_hi128_v4f64_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + ret <2 x double> %1 +} + +define <8 x i16> @extract_lo128_v16i16_1(<16 x i16> %a) { +; CHECK-LABEL: extract_lo128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %a, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_hi128_v16i16_1(<16 x i16> %a) { +; CHECK-LABEL: extract_hi128_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %a, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_lo128_v16i16_2(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: extract_lo128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %b, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <8 x i16> @extract_hi128_v16i16_2(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: extract_hi128_v16i16_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <16 x i16> %b, <16 x i16> poison, + <8 x i32> + ret <8 x i16> %1 +} + +define <16 x i8> @extract_lo128_v32i8_1(<32 x i8> %a) { +; CHECK-LABEL: extract_lo128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %a, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_hi128_v32i8_1(<32 x i8> %a) { +; CHECK-LABEL: extract_hi128_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %a, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_lo128_v32i8_2(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: extract_lo128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %b, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} + +define <16 x i8> @extract_hi128_v32i8_2(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: extract_hi128_v32i8_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; CHECK-NEXT: ret +entry: + %1 = shufflevector <32 x i8> %b, <32 x i8> poison, + <16 x i32> + ret <16 x i8> %1 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll index 818bd4311615d..506b5c1232f25 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll @@ -18,10 +18,10 @@ define void @foo() { ; CHECK-NEXT: ld.d $a3, $a3, %got_pc_lo12(g_813) ; CHECK-NEXT: st.w $zero, $a1, 0 ; CHECK-NEXT: st.w $a2, $a3, 0 +; CHECK-NEXT: xvrepli.b $xr0, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: vrepli.b $vr0, 0 ; CHECK-NEXT: vst $vr0, $a0, 32 -; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 -; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: st.w $zero, $a0, 20 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/NVPTX/aggregate-return.ll b/llvm/test/CodeGen/NVPTX/aggregate-return.ll index 1c8f019922e37..7f52e5293d964 100644 --- a/llvm/test/CodeGen/NVPTX/aggregate-return.ll +++ b/llvm/test/CodeGen/NVPTX/aggregate-return.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} @@ -7,57 +8,105 @@ declare [2 x float] @bara([2 x float] %input) declare {float, float} @bars({float, float} %input) define void @test_v2f32(<2 x float> %input, ptr %output) { -; CHECK-LABEL: @test_v2f32 +; CHECK-LABEL: test_v2f32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_v2f32_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd1; +; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: call.uni (retval0), barv, (param0); +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: ld.param.b64 %rd4, [test_v2f32_param_1]; +; CHECK-NEXT: st.b64 [%rd4], %rd2; +; CHECK-NEXT: ret; %call = tail call <2 x float> @barv(<2 x float> %input) -; CHECK: .param .align 8 .b8 retval0[8]; -; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [retval0]; store <2 x float> %call, ptr %output, align 8 -; CHECK: st.v2.b32 [{{%rd[0-9]+}}], {[[E0]], [[E1]]} ret void } define void @test_v3f32(<3 x float> %input, ptr %output) { -; CHECK-LABEL: @test_v3f32 -; +; CHECK-LABEL: test_v3f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<10>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_v3f32_param_0]; +; CHECK-NEXT: ld.param.b32 %r3, [test_v3f32_param_0+8]; +; CHECK-NEXT: { // callseq 1, 0 +; CHECK-NEXT: .param .align 16 .b8 param0[16]; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; +; CHECK-NEXT: st.param.b32 [param0+8], %r3; +; CHECK-NEXT: .param .align 16 .b8 retval0[16]; +; CHECK-NEXT: call.uni (retval0), barv3, (param0); +; CHECK-NEXT: ld.param.v2.b32 {%r4, %r5}, [retval0]; +; CHECK-NEXT: ld.param.b32 %r6, [retval0+8]; +; CHECK-NEXT: } // callseq 1 +; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_1]; +; CHECK-NEXT: st.v2.b32 [%rd1], {%r4, %r5}; +; CHECK-NEXT: st.b32 [%rd1+8], %r6; +; CHECK-NEXT: ret; %call = tail call <3 x float> @barv3(<3 x float> %input) -; CHECK: .param .align 16 .b8 retval0[16]; -; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [retval0]; -; CHECK-DAG: ld.param.b32 [[E2:%r[0-9]+]], [retval0+8]; ; Make sure we don't load more values than than we need to. -; CHECK-NOT: ld.param.b32 [[E3:%r[0-9]+]], [retval0+12]; store <3 x float> %call, ptr %output, align 8 -; CHECK-DAG: st.b32 [{{%rd[0-9]}}+8], -; -- This is suboptimal. We should do st.v2.f32 instead -; of combining 2xf32 info i64. -; CHECK-DAG: st.b64 [{{%rd[0-9]}}], -; CHECK: ret; ret void } define void @test_a2f32([2 x float] %input, ptr %output) { -; CHECK-LABEL: @test_a2f32 +; CHECK-LABEL: test_a2f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_a2f32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [test_a2f32_param_0+4]; +; CHECK-NEXT: { // callseq 2, 0 +; CHECK-NEXT: .param .align 4 .b8 param0[8]; +; CHECK-NEXT: st.param.b32 [param0], %r1; +; CHECK-NEXT: st.param.b32 [param0+4], %r2; +; CHECK-NEXT: .param .align 4 .b8 retval0[8]; +; CHECK-NEXT: call.uni (retval0), bara, (param0); +; CHECK-NEXT: ld.param.b32 %r3, [retval0]; +; CHECK-NEXT: ld.param.b32 %r4, [retval0+4]; +; CHECK-NEXT: } // callseq 2 +; CHECK-NEXT: ld.param.b64 %rd1, [test_a2f32_param_1]; +; CHECK-NEXT: st.b32 [%rd1+4], %r4; +; CHECK-NEXT: st.b32 [%rd1], %r3; +; CHECK-NEXT: ret; %call = tail call [2 x float] @bara([2 x float] %input) -; CHECK: .param .align 4 .b8 retval0[8]; -; CHECK-DAG: ld.param.b32 [[ELEMA1:%r[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b32 [[ELEMA2:%r[0-9]+]], [retval0+4]; store [2 x float] %call, ptr %output, align 4 -; CHECK: } -; CHECK-DAG: st.b32 [{{%rd[0-9]+}}], [[ELEMA1]] -; CHECK-DAG: st.b32 [{{%rd[0-9]+}}+4], [[ELEMA2]] ret void -; CHECK: ret } define void @test_s2f32({float, float} %input, ptr %output) { -; CHECK-LABEL: @test_s2f32 +; CHECK-LABEL: test_s2f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_s2f32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [test_s2f32_param_0+4]; +; CHECK-NEXT: { // callseq 3, 0 +; CHECK-NEXT: .param .align 4 .b8 param0[8]; +; CHECK-NEXT: st.param.b32 [param0], %r1; +; CHECK-NEXT: st.param.b32 [param0+4], %r2; +; CHECK-NEXT: .param .align 4 .b8 retval0[8]; +; CHECK-NEXT: call.uni (retval0), bars, (param0); +; CHECK-NEXT: ld.param.b32 %r3, [retval0]; +; CHECK-NEXT: ld.param.b32 %r4, [retval0+4]; +; CHECK-NEXT: } // callseq 3 +; CHECK-NEXT: ld.param.b64 %rd1, [test_s2f32_param_1]; +; CHECK-NEXT: st.b32 [%rd1+4], %r4; +; CHECK-NEXT: st.b32 [%rd1], %r3; +; CHECK-NEXT: ret; %call = tail call {float, float} @bars({float, float} %input) -; CHECK: .param .align 4 .b8 retval0[8]; -; CHECK-DAG: ld.param.b32 [[ELEMS1:%r[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b32 [[ELEMS2:%r[0-9]+]], [retval0+4]; store {float, float} %call, ptr %output, align 4 -; CHECK: } -; CHECK-DAG: st.b32 [{{%rd[0-9]+}}], [[ELEMS1]] -; CHECK-DAG: st.b32 [{{%rd[0-9]+}}+4], [[ELEMS2]] ret void -; CHECK: ret } diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll index a386e4292777b..aee58a044a986 100644 --- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll @@ -688,25 +688,25 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0]; ; SM70-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r3; -; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r4; -; SM70-NEXT: mov.b32 {%rs5, %rs6}, %r1; -; SM70-NEXT: mov.b32 {%rs7, %rs8}, %r2; -; SM70-NEXT: cvt.u32.u16 %r5, %rs8; +; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; SM70-NEXT: cvt.u32.u16 %r5, %rs2; ; SM70-NEXT: shl.b32 %r6, %r5, 16; -; SM70-NEXT: cvt.u32.u16 %r7, %rs7; +; SM70-NEXT: cvt.u32.u16 %r7, %rs1; ; SM70-NEXT: shl.b32 %r8, %r7, 16; -; SM70-NEXT: cvt.u32.u16 %r9, %rs6; +; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; SM70-NEXT: cvt.u32.u16 %r9, %rs4; ; SM70-NEXT: shl.b32 %r10, %r9, 16; -; SM70-NEXT: cvt.u32.u16 %r11, %rs5; +; SM70-NEXT: cvt.u32.u16 %r11, %rs3; ; SM70-NEXT: shl.b32 %r12, %r11, 16; -; SM70-NEXT: cvt.u32.u16 %r13, %rs4; +; SM70-NEXT: mov.b32 {%rs5, %rs6}, %r4; +; SM70-NEXT: cvt.u32.u16 %r13, %rs6; ; SM70-NEXT: shl.b32 %r14, %r13, 16; -; SM70-NEXT: cvt.u32.u16 %r15, %rs3; +; SM70-NEXT: cvt.u32.u16 %r15, %rs5; ; SM70-NEXT: shl.b32 %r16, %r15, 16; -; SM70-NEXT: cvt.u32.u16 %r17, %rs2; +; SM70-NEXT: mov.b32 {%rs7, %rs8}, %r3; +; SM70-NEXT: cvt.u32.u16 %r17, %rs8; ; SM70-NEXT: shl.b32 %r18, %r17, 16; -; SM70-NEXT: cvt.u32.u16 %r19, %rs1; +; SM70-NEXT: cvt.u32.u16 %r19, %rs7; ; SM70-NEXT: shl.b32 %r20, %r19, 16; ; SM70-NEXT: st.param.v4.b32 [func_retval0+16], {%r20, %r18, %r16, %r14}; ; SM70-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r10, %r8, %r6}; @@ -721,18 +721,18 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM80-NEXT: // %bb.0: ; SM80-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0]; ; SM80-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r3; -; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r4; -; SM80-NEXT: mov.b32 {%rs5, %rs6}, %r1; -; SM80-NEXT: mov.b32 {%rs7, %rs8}, %r2; -; SM80-NEXT: cvt.f32.bf16 %r5, %rs8; -; SM80-NEXT: cvt.f32.bf16 %r6, %rs7; -; SM80-NEXT: cvt.f32.bf16 %r7, %rs6; -; SM80-NEXT: cvt.f32.bf16 %r8, %rs5; -; SM80-NEXT: cvt.f32.bf16 %r9, %rs4; -; SM80-NEXT: cvt.f32.bf16 %r10, %rs3; -; SM80-NEXT: cvt.f32.bf16 %r11, %rs2; -; SM80-NEXT: cvt.f32.bf16 %r12, %rs1; +; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; SM80-NEXT: cvt.f32.bf16 %r5, %rs2; +; SM80-NEXT: cvt.f32.bf16 %r6, %rs1; +; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; SM80-NEXT: cvt.f32.bf16 %r7, %rs4; +; SM80-NEXT: cvt.f32.bf16 %r8, %rs3; +; SM80-NEXT: mov.b32 {%rs5, %rs6}, %r4; +; SM80-NEXT: cvt.f32.bf16 %r9, %rs6; +; SM80-NEXT: cvt.f32.bf16 %r10, %rs5; +; SM80-NEXT: mov.b32 {%rs7, %rs8}, %r3; +; SM80-NEXT: cvt.f32.bf16 %r11, %rs8; +; SM80-NEXT: cvt.f32.bf16 %r12, %rs7; ; SM80-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9}; ; SM80-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; ; SM80-NEXT: ret; @@ -746,18 +746,18 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM80-FTZ-NEXT: // %bb.0: ; SM80-FTZ-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0]; ; SM80-FTZ-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r3; -; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r4; -; SM80-FTZ-NEXT: mov.b32 {%rs5, %rs6}, %r1; -; SM80-FTZ-NEXT: mov.b32 {%rs7, %rs8}, %r2; -; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r5, %rs8; -; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r6, %rs7; -; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r7, %rs6; -; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r8, %rs5; -; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r9, %rs4; -; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r10, %rs3; -; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r11, %rs2; -; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r12, %rs1; +; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r5, %rs2; +; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r6, %rs1; +; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r7, %rs4; +; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r8, %rs3; +; SM80-FTZ-NEXT: mov.b32 {%rs5, %rs6}, %r4; +; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r9, %rs6; +; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r10, %rs5; +; SM80-FTZ-NEXT: mov.b32 {%rs7, %rs8}, %r3; +; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r11, %rs8; +; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r12, %rs7; ; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9}; ; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; ; SM80-FTZ-NEXT: ret; @@ -771,18 +771,18 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0]; ; SM90-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r3; -; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r4; -; SM90-NEXT: mov.b32 {%rs5, %rs6}, %r1; -; SM90-NEXT: mov.b32 {%rs7, %rs8}, %r2; -; SM90-NEXT: cvt.f32.bf16 %r5, %rs8; -; SM90-NEXT: cvt.f32.bf16 %r6, %rs7; -; SM90-NEXT: cvt.f32.bf16 %r7, %rs6; -; SM90-NEXT: cvt.f32.bf16 %r8, %rs5; -; SM90-NEXT: cvt.f32.bf16 %r9, %rs4; -; SM90-NEXT: cvt.f32.bf16 %r10, %rs3; -; SM90-NEXT: cvt.f32.bf16 %r11, %rs2; -; SM90-NEXT: cvt.f32.bf16 %r12, %rs1; +; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; SM90-NEXT: cvt.f32.bf16 %r5, %rs2; +; SM90-NEXT: cvt.f32.bf16 %r6, %rs1; +; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; SM90-NEXT: cvt.f32.bf16 %r7, %rs4; +; SM90-NEXT: cvt.f32.bf16 %r8, %rs3; +; SM90-NEXT: mov.b32 {%rs5, %rs6}, %r4; +; SM90-NEXT: cvt.f32.bf16 %r9, %rs6; +; SM90-NEXT: cvt.f32.bf16 %r10, %rs5; +; SM90-NEXT: mov.b32 {%rs7, %rs8}, %r3; +; SM90-NEXT: cvt.f32.bf16 %r11, %rs8; +; SM90-NEXT: cvt.f32.bf16 %r12, %rs7; ; SM90-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9}; ; SM90-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; ; SM90-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index ba5813c869236..e2a914d8cfc36 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -359,12 +359,11 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b, define <2 x bfloat> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0]; -; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; ; CHECK-NEXT: ret; %r = fptrunc <2 x float> %a to <2 x bfloat> ret <2 x bfloat> %r diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-prefetch.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-prefetch.ll index 09dbe91d07513..cf166f83fb241 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-prefetch.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-prefetch.ll @@ -24,8 +24,8 @@ define void @cp_async_bulk_tensor_prefetch_tile_1d(ptr %tmap, i32 %d0, i64 %ch) ; CHECK-PTX-NEXT: // %bb.0: ; CHECK-PTX-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_prefetch_tile_1d_param_0]; ; CHECK-PTX-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_prefetch_tile_1d_param_1]; -; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.1d.L2.global.tile [%rd1, {%r1}]; ; CHECK-PTX-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_prefetch_tile_1d_param_2]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.1d.L2.global.tile [%rd1, {%r1}]; ; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.1d.L2.global.tile.L2::cache_hint [%rd1, {%r1}], %rd2; ; CHECK-PTX-NEXT: ret; tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.1d(ptr %tmap, i32 %d0, i64 %ch, i1 0) @@ -44,8 +44,8 @@ define void @cp_async_bulk_tensor_prefetch_tile_2d(i32 %flag, ptr %tmap, i32 %d0 ; CHECK-PTX-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_prefetch_tile_2d_param_1]; ; CHECK-PTX-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_prefetch_tile_2d_param_2]; ; CHECK-PTX-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_prefetch_tile_2d_param_3]; -; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.2d.L2.global.tile [%rd1, {%r1, %r2}]; ; CHECK-PTX-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_prefetch_tile_2d_param_4]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.2d.L2.global.tile [%rd1, {%r1, %r2}]; ; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.2d.L2.global.tile.L2::cache_hint [%rd1, {%r1, %r2}], %rd2; ; CHECK-PTX-NEXT: ret; tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.2d(ptr %tmap, i32 %d0, i32 %d1, i64 %ch, i1 0) @@ -66,8 +66,8 @@ define void @cp_async_bulk_tensor_prefetch_3d(i32 %flag, ptr %tmap, i32 %d0, i32 ; CHECK-PTX-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_prefetch_3d_param_2]; ; CHECK-PTX-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_prefetch_3d_param_3]; ; CHECK-PTX-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_prefetch_3d_param_4]; -; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.3d.L2.global.tile [%rd1, {%r1, %r2, %r3}]; ; CHECK-PTX-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_prefetch_3d_param_6]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.3d.L2.global.tile [%rd1, {%r1, %r2, %r3}]; ; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.3d.L2.global.tile.L2::cache_hint [%rd1, {%r1, %r2, %r3}], %rd2; ; CHECK-PTX-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_prefetch_3d_param_5]; ; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.3d.L2.global.im2col [%rd1, {%r1, %r2, %r3}], {%rs1}; @@ -95,8 +95,8 @@ define void @cp_async_bulk_tensor_prefetch_4d(i32 %flag, ptr %tmap, i32 %d0, i32 ; CHECK-PTX-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_prefetch_4d_param_3]; ; CHECK-PTX-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_prefetch_4d_param_4]; ; CHECK-PTX-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_prefetch_4d_param_5]; -; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.4d.L2.global.tile [%rd1, {%r1, %r2, %r3, %r4}]; ; CHECK-PTX-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_prefetch_4d_param_8]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.4d.L2.global.tile [%rd1, {%r1, %r2, %r3, %r4}]; ; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.4d.L2.global.tile.L2::cache_hint [%rd1, {%r1, %r2, %r3, %r4}], %rd2; ; CHECK-PTX-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_prefetch_4d_param_6]; ; CHECK-PTX-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_prefetch_4d_param_7]; @@ -126,8 +126,8 @@ define void @cp_async_bulk_tensor_prefetch_5d(i32 %flag, ptr %tmap, i32 %d0, i32 ; CHECK-PTX-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_prefetch_5d_param_4]; ; CHECK-PTX-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_prefetch_5d_param_5]; ; CHECK-PTX-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_prefetch_5d_param_6]; -; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.5d.L2.global.tile [%rd1, {%r1, %r2, %r3, %r4, %r5}]; ; CHECK-PTX-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_prefetch_5d_param_10]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.5d.L2.global.tile [%rd1, {%r1, %r2, %r3, %r4, %r5}]; ; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.5d.L2.global.tile.L2::cache_hint [%rd1, {%r1, %r2, %r3, %r4, %r5}], %rd2; ; CHECK-PTX-NEXT: ld.param.b16 %rs1, [cp_async_bulk_tensor_prefetch_5d_param_7]; ; CHECK-PTX-NEXT: ld.param.b16 %rs2, [cp_async_bulk_tensor_prefetch_5d_param_8]; diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-s2g.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-s2g.ll index 5998883f77ac1..3b5bd161896bc 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-s2g.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-s2g.ll @@ -27,8 +27,8 @@ define void @cp_async_bulk_tensor_s2g_tile_1d(ptr addrspace(3) %src, ptr %tmap, ; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_s2g_tile_1d_param_0]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_s2g_tile_1d_param_1]; ; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_s2g_tile_1d_param_2]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group [%rd2, {%r1}], [%rd1]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_s2g_tile_1d_param_3]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group [%rd2, {%r1}], [%rd1]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd2, {%r1}], [%rd1], %rd3; ; CHECK-PTX64-NEXT: ret; ; @@ -41,8 +41,8 @@ define void @cp_async_bulk_tensor_s2g_tile_1d(ptr addrspace(3) %src, ptr %tmap, ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_s2g_tile_1d_param_0]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_s2g_tile_1d_param_1]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_s2g_tile_1d_param_2]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group [%rd1, {%r2}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_s2g_tile_1d_param_3]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group [%rd1, {%r2}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd1, {%r2}], [%r1], %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; tail call void @llvm.nvvm.cp.async.bulk.tensor.s2g.tile.1d(ptr addrspace(3) %src, ptr %tmap, i32 %d0, i64 %ch, i1 0) @@ -62,8 +62,8 @@ define void @cp_async_bulk_tensor_s2g_tile_2d(i32 %flag, ptr addrspace(3) %src, ; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_s2g_tile_2d_param_2]; ; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_s2g_tile_2d_param_3]; ; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_s2g_tile_2d_param_4]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group [%rd2, {%r1, %r2}], [%rd1]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_s2g_tile_2d_param_5]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group [%rd2, {%r1, %r2}], [%rd1]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd2, {%r1, %r2}], [%rd1], %rd3; ; CHECK-PTX64-NEXT: ret; ; @@ -77,8 +77,8 @@ define void @cp_async_bulk_tensor_s2g_tile_2d(i32 %flag, ptr addrspace(3) %src, ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [cp_async_bulk_tensor_s2g_tile_2d_param_2]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_s2g_tile_2d_param_3]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_s2g_tile_2d_param_4]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group [%rd1, {%r2, %r3}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_s2g_tile_2d_param_5]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group [%rd1, {%r2, %r3}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd1, {%r2, %r3}], [%r1], %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; tail call void @llvm.nvvm.cp.async.bulk.tensor.s2g.tile.2d(ptr addrspace(3) %src, ptr %tmap, i32 %d0, i32 %d1, i64 %ch, i1 0) @@ -99,8 +99,8 @@ define void @cp_async_bulk_tensor_s2g_3d(i32 %flag, ptr addrspace(3) %src, ptr % ; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_tensor_s2g_3d_param_3]; ; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_s2g_3d_param_4]; ; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_s2g_3d_param_5]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group [%rd2, {%r1, %r2, %r3}], [%rd1]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_s2g_3d_param_6]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group [%rd2, {%r1, %r2, %r3}], [%rd1]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd2, {%r1, %r2, %r3}], [%rd1], %rd3; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.im2col_no_offs.bulk_group [%rd2, {%r1, %r2, %r3}], [%rd1]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.im2col_no_offs.bulk_group.L2::cache_hint [%rd2, {%r1, %r2, %r3}], [%rd1], %rd3; @@ -117,8 +117,8 @@ define void @cp_async_bulk_tensor_s2g_3d(i32 %flag, ptr addrspace(3) %src, ptr % ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_s2g_3d_param_3]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_s2g_3d_param_4]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_s2g_3d_param_5]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group [%rd1, {%r2, %r3, %r4}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_s2g_3d_param_6]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group [%rd1, {%r2, %r3, %r4}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd1, {%r2, %r3, %r4}], [%r1], %rd2; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.im2col_no_offs.bulk_group [%rd1, {%r2, %r3, %r4}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.3d.global.shared::cta.im2col_no_offs.bulk_group.L2::cache_hint [%rd1, {%r2, %r3, %r4}], [%r1], %rd2; @@ -145,8 +145,8 @@ define void @cp_async_bulk_tensor_s2g_4d(i32 %flag, ptr addrspace(3) %src, ptr % ; CHECK-PTX64-NEXT: ld.param.b32 %r2, [cp_async_bulk_tensor_s2g_4d_param_4]; ; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_s2g_4d_param_5]; ; CHECK-PTX64-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_s2g_4d_param_6]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group [%rd2, {%r1, %r2, %r3, %r4}], [%rd1]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_s2g_4d_param_7]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group [%rd2, {%r1, %r2, %r3, %r4}], [%rd1]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd2, {%r1, %r2, %r3, %r4}], [%rd1], %rd3; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.im2col_no_offs.bulk_group [%rd2, {%r1, %r2, %r3, %r4}], [%rd1]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.im2col_no_offs.bulk_group.L2::cache_hint [%rd2, {%r1, %r2, %r3, %r4}], [%rd1], %rd3; @@ -164,8 +164,8 @@ define void @cp_async_bulk_tensor_s2g_4d(i32 %flag, ptr addrspace(3) %src, ptr % ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_s2g_4d_param_4]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_s2g_4d_param_5]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_s2g_4d_param_6]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group [%rd1, {%r2, %r3, %r4, %r5}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_s2g_4d_param_7]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group [%rd1, {%r2, %r3, %r4, %r5}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd1, {%r2, %r3, %r4, %r5}], [%r1], %rd2; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.im2col_no_offs.bulk_group [%rd1, {%r2, %r3, %r4, %r5}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.4d.global.shared::cta.im2col_no_offs.bulk_group.L2::cache_hint [%rd1, {%r2, %r3, %r4, %r5}], [%r1], %rd2; @@ -193,8 +193,8 @@ define void @cp_async_bulk_tensor_s2g_5d(i32 %flag, ptr addrspace(3) %src, ptr % ; CHECK-PTX64-NEXT: ld.param.b32 %r3, [cp_async_bulk_tensor_s2g_5d_param_5]; ; CHECK-PTX64-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_s2g_5d_param_6]; ; CHECK-PTX64-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_s2g_5d_param_7]; -; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group [%rd2, {%r1, %r2, %r3, %r4, %r5}], [%rd1]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_tensor_s2g_5d_param_8]; +; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group [%rd2, {%r1, %r2, %r3, %r4, %r5}], [%rd1]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd2, {%r1, %r2, %r3, %r4, %r5}], [%rd1], %rd3; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.im2col_no_offs.bulk_group [%rd2, {%r1, %r2, %r3, %r4, %r5}], [%rd1]; ; CHECK-PTX64-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.im2col_no_offs.bulk_group.L2::cache_hint [%rd2, {%r1, %r2, %r3, %r4, %r5}], [%rd1], %rd3; @@ -213,8 +213,8 @@ define void @cp_async_bulk_tensor_s2g_5d(i32 %flag, ptr addrspace(3) %src, ptr % ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r4, [cp_async_bulk_tensor_s2g_5d_param_5]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r5, [cp_async_bulk_tensor_s2g_5d_param_6]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r6, [cp_async_bulk_tensor_s2g_5d_param_7]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group [%rd1, {%r2, %r3, %r4, %r5, %r6}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_tensor_s2g_5d_param_8]; +; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group [%rd1, {%r2, %r3, %r4, %r5, %r6}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group.L2::cache_hint [%rd1, {%r2, %r3, %r4, %r5, %r6}], [%r1], %rd2; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.im2col_no_offs.bulk_group [%rd1, {%r2, %r3, %r4, %r5, %r6}], [%r1]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.tensor.5d.global.shared::cta.im2col_no_offs.bulk_group.L2::cache_hint [%rd1, {%r2, %r3, %r4, %r5, %r6}], [%r1], %rd2; diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 093bc20547b85..d0e2c1817f696 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -45,12 +45,11 @@ define <2 x half> @test_ret_const() #0 { define half @test_extract_0(<2 x half> %a) #0 { ; CHECK-LABEL: test_extract_0( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_extract_0_param_0]; -; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; } +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_0_param_0]; ; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; ; CHECK-NEXT: ret; %e = extractelement <2 x half> %a, i32 0 @@ -60,13 +59,12 @@ define half @test_extract_0(<2 x half> %a) #0 { define half @test_extract_1(<2 x half> %a) #0 { ; CHECK-LABEL: test_extract_1( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_extract_1_param_0]; -; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; } -; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_1_param_0]; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs2; ; CHECK-NEXT: ret; %e = extractelement <2 x half> %a, i32 1 ret half %e @@ -82,9 +80,8 @@ define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1]; -; CHECK-NEXT: ld.param.b32 %r1, [test_extract_i_param_0]; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_i_param_0]; ; CHECK-NEXT: setp.eq.b64 %p1, %rd1, 0; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-NEXT: ret; @@ -110,16 +107,14 @@ define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fadd_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fadd_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: add.rn.f32 %r5, %r4, %r3; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; ; CHECK-NOF16-NEXT: add.rn.f32 %r8, %r7, %r6; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -148,8 +143,7 @@ define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_imm_0_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_imm_0_param_0]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: add.rn.f32 %r3, %r2, 0f40000000; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -181,8 +175,7 @@ define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_imm_1_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_imm_1_param_0]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: add.rn.f32 %r3, %r2, 0f40000000; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -214,16 +207,14 @@ define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fsub_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fsub_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fsub_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fsub_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: sub.rn.f32 %r5, %r4, %r3; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; ; CHECK-NOF16-NEXT: sub.rn.f32 %r8, %r7, %r6; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -251,8 +242,7 @@ define <2 x half> @test_fneg(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fneg_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fneg_param_0]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: mov.b32 %r3, 0f00000000; ; CHECK-NOF16-NEXT: sub.rn.f32 %r4, %r3, %r2; @@ -285,16 +275,14 @@ define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fmul_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fmul_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fmul_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fmul_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: mul.rn.f32 %r5, %r4, %r3; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; ; CHECK-NOF16-NEXT: mul.rn.f32 %r8, %r7, %r6; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -311,16 +299,14 @@ define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r2, [test_fdiv_param_1]; -; CHECK-NEXT: ld.param.b32 %r1, [test_fdiv_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fdiv_param_0]; +; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fdiv_param_1]; +; CHECK-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NEXT: div.rn.f32 %r5, %r4, %r3; ; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NEXT: cvt.f32.f16 %r6, %rs1; -; CHECK-NEXT: cvt.f32.f16 %r7, %rs3; +; CHECK-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NEXT: cvt.f32.f16 %r7, %rs1; ; CHECK-NEXT: div.rn.f32 %r8, %r7, %r6; ; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -345,12 +331,10 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .reg .b32 %r<18>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r2, [test_frem_param_1]; -; CHECK-NEXT: ld.param.b32 %r1, [test_frem_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_frem_param_0]; +; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_frem_param_1]; +; CHECK-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NEXT: div.rn.f32 %r5, %r4, %r3; ; CHECK-NEXT: cvt.rzi.f32.f32 %r6, %r5; ; CHECK-NEXT: neg.f32 %r7, %r6; @@ -358,8 +342,8 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: testp.infinite.f32 %p1, %r3; ; CHECK-NEXT: selp.f32 %r9, %r4, %r8, %p1; ; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %r9; -; CHECK-NEXT: cvt.f32.f16 %r10, %rs1; -; CHECK-NEXT: cvt.f32.f16 %r11, %rs3; +; CHECK-NEXT: cvt.f32.f16 %r10, %rs3; +; CHECK-NEXT: cvt.f32.f16 %r11, %rs1; ; CHECK-NEXT: div.rn.f32 %r12, %r11, %r10; ; CHECK-NEXT: cvt.rzi.f32.f32 %r13, %r12; ; CHECK-NEXT: neg.f32 %r14, %r13; @@ -551,13 +535,11 @@ define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, < ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r4, [test_select_cc_param_3]; ; CHECK-F16-NEXT: ld.param.b32 %r3, [test_select_cc_param_2]; -; CHECK-F16-NEXT: ld.param.b32 %r2, [test_select_cc_param_1]; -; CHECK-F16-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; +; CHECK-F16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0]; ; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r3, %r4; -; CHECK-F16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-F16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-F16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2; -; CHECK-F16-NEXT: selp.b16 %rs6, %rs3, %rs1, %p1; +; CHECK-F16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_1]; +; CHECK-F16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2; +; CHECK-F16-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1; ; CHECK-F16-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; CHECK-F16-NEXT: ret; ; @@ -568,22 +550,18 @@ define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, < ; CHECK-NOF16-NEXT: .reg .b32 %r<9>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r4, [test_select_cc_param_3]; -; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_select_cc_param_2]; -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_select_cc_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_3]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_select_cc_param_2]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs5; ; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r6, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs6; ; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r8, %r7; -; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r2; -; CHECK-NOF16-NEXT: mov.b32 {%rs7, %rs8}, %r1; -; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p2; -; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs5, %p1; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs7, %rs8}, [test_select_cc_param_1]; +; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs2, %rs8, %p2; +; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs7, %p1; ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs9}; ; CHECK-NOF16-NEXT: ret; %cc = fcmp une <2 x half> %c, %d @@ -596,15 +574,16 @@ define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .pred %p<3>; ; CHECK-F16-NEXT: .reg .b32 %r<9>; +; CHECK-F16-NEXT: .reg .b64 %rd<3>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: -; CHECK-F16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_1]; -; CHECK-F16-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_f32_f16_param_0]; -; CHECK-F16-NEXT: ld.param.b32 %r6, [test_select_cc_f32_f16_param_3]; -; CHECK-F16-NEXT: ld.param.b32 %r5, [test_select_cc_f32_f16_param_2]; -; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r5, %r6; -; CHECK-F16-NEXT: selp.f32 %r7, %r2, %r4, %p2; -; CHECK-F16-NEXT: selp.f32 %r8, %r1, %r3, %p1; +; CHECK-F16-NEXT: ld.param.b32 %r2, [test_select_cc_f32_f16_param_3]; +; CHECK-F16-NEXT: ld.param.b32 %r1, [test_select_cc_f32_f16_param_2]; +; CHECK-F16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_0]; +; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r1, %r2; +; CHECK-F16-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f32_f16_param_1]; +; CHECK-F16-NEXT: selp.f32 %r7, %r4, %r6, %p2; +; CHECK-F16-NEXT: selp.f32 %r8, %r3, %r5, %p1; ; CHECK-F16-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; ; CHECK-F16-NEXT: ret; ; @@ -613,22 +592,21 @@ define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, ; CHECK-NOF16-NEXT: .reg .pred %p<3>; ; CHECK-NOF16-NEXT: .reg .b16 %rs<5>; ; CHECK-NOF16-NEXT: .reg .b32 %r<13>; +; CHECK-NOF16-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_1]; -; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_f32_f16_param_0]; -; CHECK-NOF16-NEXT: ld.param.b32 %r6, [test_select_cc_f32_f16_param_3]; -; CHECK-NOF16-NEXT: ld.param.b32 %r5, [test_select_cc_f32_f16_param_2]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r6; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; -; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r8, %r7; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs4; -; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r10, %r9; -; CHECK-NOF16-NEXT: selp.f32 %r11, %r2, %r4, %p2; -; CHECK-NOF16-NEXT: selp.f32 %r12, %r1, %r3, %p1; +; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_f32_f16_param_3]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_f32_f16_param_2]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r6, %r5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs4; +; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r8, %r7; +; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r9, %r10}, [test_select_cc_f32_f16_param_1]; +; CHECK-NOF16-NEXT: selp.f32 %r11, %r4, %r10, %p2; +; CHECK-NOF16-NEXT: selp.f32 %r12, %r3, %r9, %p1; ; CHECK-NOF16-NEXT: st.param.v2.b32 [func_retval0], {%r12, %r11}; ; CHECK-NOF16-NEXT: ret; <2 x half> %c, <2 x half> %d) #0 { @@ -643,18 +621,17 @@ define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, ; CHECK-NEXT: .reg .pred %p<3>; ; CHECK-NEXT: .reg .b16 %rs<7>; ; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f16_f32_param_3]; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_f16_f32_param_0]; ; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f16_f32_param_2]; -; CHECK-NEXT: ld.param.b32 %r2, [test_select_cc_f16_f32_param_1]; -; CHECK-NEXT: ld.param.b32 %r1, [test_select_cc_f16_f32_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f16_f32_param_3]; ; CHECK-NEXT: setp.neu.f32 %p1, %r3, %r5; ; CHECK-NEXT: setp.neu.f32 %p2, %r4, %r6; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2; -; CHECK-NEXT: selp.b16 %rs6, %rs3, %rs1, %p1; +; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_f16_f32_param_1]; +; CHECK-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2; +; CHECK-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; CHECK-NEXT: ret; <2 x float> %c, <2 x float> %d) #0 { @@ -687,15 +664,13 @@ define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_une_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_une_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_une_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_une_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -730,15 +705,13 @@ define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ueq_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ueq_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ueq_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ueq_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.equ.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.equ.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -773,15 +746,13 @@ define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ugt_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ugt_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ugt_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ugt_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.gtu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.gtu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -816,15 +787,13 @@ define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_uge_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_uge_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_uge_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_uge_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.geu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.geu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -859,15 +828,13 @@ define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ult_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ult_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ult_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ult_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.ltu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.ltu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -902,15 +869,13 @@ define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ule_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ule_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ule_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ule_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.leu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.leu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -946,15 +911,13 @@ define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_uno_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_uno_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_uno_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_uno_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -989,15 +952,13 @@ define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_one_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_one_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_one_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_one_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.ne.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.ne.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1032,15 +993,13 @@ define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_oeq_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_oeq_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_oeq_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_oeq_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1075,15 +1034,13 @@ define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ogt_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ogt_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ogt_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ogt_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.gt.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1118,15 +1075,13 @@ define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_oge_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_oge_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_oge_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_oge_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.ge.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.ge.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1161,15 +1116,13 @@ define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_olt_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_olt_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_olt_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_olt_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.lt.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1204,15 +1157,13 @@ define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ole_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ole_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ole_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ole_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.le.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.le.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1247,15 +1198,13 @@ define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ord_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ord_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ord_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ord_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NOF16-NEXT: setp.num.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: setp.num.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1273,8 +1222,7 @@ define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_fptosi_i32_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptosi_i32_param_0]; ; CHECK-NEXT: cvt.rzi.s32.f16 %r2, %rs2; ; CHECK-NEXT: cvt.rzi.s32.f16 %r3, %rs1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; @@ -1291,8 +1239,7 @@ define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_fptosi_i64_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptosi_i64_param_0]; ; CHECK-NEXT: cvt.rzi.s64.f16 %rd1, %rs2; ; CHECK-NEXT: cvt.rzi.s64.f16 %rd2, %rs1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; @@ -1308,8 +1255,7 @@ define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_fptoui_2xi32_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptoui_2xi32_param_0]; ; CHECK-NEXT: cvt.rzi.u32.f16 %r2, %rs2; ; CHECK-NEXT: cvt.rzi.u32.f16 %r3, %rs1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; @@ -1326,8 +1272,7 @@ define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_fptoui_2xi64_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptoui_2xi64_param_0]; ; CHECK-NEXT: cvt.rzi.u64.f16 %rd1, %rs2; ; CHECK-NEXT: cvt.rzi.u64.f16 %rd2, %rs1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; @@ -1424,17 +1369,16 @@ define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; -; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_uitofp_2xi32_fadd_param_1]; ; CHECK-NOF16-NEXT: cvt.rn.f16.u32 %rs1, %r1; ; CHECK-NOF16-NEXT: cvt.rn.f16.u32 %rs2, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4; -; CHECK-NOF16-NEXT: add.rn.f32 %r6, %r5, %r4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_uitofp_2xi32_fadd_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs2; +; CHECK-NOF16-NEXT: add.rn.f32 %r6, %r4, %r5; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r6; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; -; CHECK-NOF16-NEXT: add.rn.f32 %r9, %r8, %r7; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1; +; CHECK-NOF16-NEXT: add.rn.f32 %r9, %r7, %r8; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r9; ; CHECK-NOF16-NEXT: mov.b32 %r10, {%rs6, %rs5}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r10; @@ -1467,17 +1411,16 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sitofp_2xi32_fadd_param_0]; -; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_sitofp_2xi32_fadd_param_1]; ; CHECK-NOF16-NEXT: cvt.rn.f16.s32 %rs1, %r1; ; CHECK-NOF16-NEXT: cvt.rn.f16.s32 %rs2, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4; -; CHECK-NOF16-NEXT: add.rn.f32 %r6, %r5, %r4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_sitofp_2xi32_fadd_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs2; +; CHECK-NOF16-NEXT: add.rn.f32 %r6, %r4, %r5; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r6; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; -; CHECK-NOF16-NEXT: add.rn.f32 %r9, %r8, %r7; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1; +; CHECK-NOF16-NEXT: add.rn.f32 %r9, %r7, %r8; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r9; ; CHECK-NOF16-NEXT: mov.b32 %r10, {%rs6, %rs5}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r10; @@ -1490,15 +1433,11 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; -; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0]; -; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %r2; -; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r1; -; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1}; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; ; CHECK-NEXT: ret; %r = fptrunc <2 x float> %a to <2 x half> ret <2 x half> %r @@ -1529,8 +1468,7 @@ define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_fpext_2xfloat_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fpext_2xfloat_param_0]; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: cvt.f32.f16 %r3, %rs1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; @@ -1547,8 +1485,7 @@ define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_fpext_2xdouble_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fpext_2xdouble_param_0]; ; CHECK-NEXT: cvt.f64.f16 %rd1, %rs2; ; CHECK-NEXT: cvt.f64.f16 %rd2, %rs1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; @@ -1641,8 +1578,7 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_sqrt_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sqrt_param_0]; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: sqrt.rn.f32 %r3, %r2; ; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -1670,8 +1606,7 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 { ; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_sin_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sin_param_0]; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: sin.approx.f32 %r3, %r2; ; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -1692,8 +1627,7 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 { ; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_cos_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_cos_param_0]; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: cos.approx.f32 %r3, %r2; ; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -1769,20 +1703,17 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<13>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_fma_param_2]; -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fma_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fma_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4; -; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs6; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fma_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fma_param_2]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_fma_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs2; ; CHECK-NOF16-NEXT: fma.rn.f32 %r7, %r6, %r5, %r4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs7, %r7; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs1; ; CHECK-NOF16-NEXT: fma.rn.f32 %r11, %r10, %r9, %r8; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs8, %r11; ; CHECK-NOF16-NEXT: mov.b32 %r12, {%rs8, %rs7}; @@ -1809,8 +1740,7 @@ define <2 x half> @test_fabs(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fabs_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fabs_param_0]; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: abs.f32 %r3, %r2; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -1831,16 +1761,14 @@ define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r2, [test_minnum_param_1]; -; CHECK-NEXT: ld.param.b32 %r1, [test_minnum_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_minnum_param_0]; +; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_minnum_param_1]; +; CHECK-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NEXT: min.f32 %r5, %r4, %r3; ; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NEXT: cvt.f32.f16 %r6, %rs1; -; CHECK-NEXT: cvt.f32.f16 %r7, %rs3; +; CHECK-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NEXT: cvt.f32.f16 %r7, %rs1; ; CHECK-NEXT: min.f32 %r8, %r7, %r6; ; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -1857,16 +1785,14 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r2, [test_maxnum_param_1]; -; CHECK-NEXT: ld.param.b32 %r1, [test_maxnum_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NEXT: cvt.f32.f16 %r3, %rs2; -; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; CHECK-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_maxnum_param_0]; +; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_maxnum_param_1]; +; CHECK-NEXT: cvt.f32.f16 %r3, %rs4; +; CHECK-NEXT: cvt.f32.f16 %r4, %rs2; ; CHECK-NEXT: max.f32 %r5, %r4, %r3; ; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NEXT: cvt.f32.f16 %r6, %rs1; -; CHECK-NEXT: cvt.f32.f16 %r7, %rs3; +; CHECK-NEXT: cvt.f32.f16 %r6, %rs3; +; CHECK-NEXT: cvt.f32.f16 %r7, %rs1; ; CHECK-NEXT: max.f32 %r8, %r7, %r6; ; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -1896,15 +1822,13 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<3>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_copysign_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, -32768; -; CHECK-NOF16-NEXT: mov.b32 {%rs4, %rs5}, %r1; -; CHECK-NOF16-NEXT: and.b16 %rs6, %rs5, 32767; -; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs3; -; CHECK-NOF16-NEXT: and.b16 %rs8, %rs1, -32768; -; CHECK-NOF16-NEXT: and.b16 %rs9, %rs4, 32767; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_copysign_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_copysign_param_1]; +; CHECK-NOF16-NEXT: and.b16 %rs5, %rs4, -32768; +; CHECK-NOF16-NEXT: and.b16 %rs6, %rs2, 32767; +; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs5; +; CHECK-NOF16-NEXT: and.b16 %rs8, %rs3, -32768; +; CHECK-NOF16-NEXT: and.b16 %rs9, %rs1, 32767; ; CHECK-NOF16-NEXT: or.b16 %rs10, %rs9, %rs8; ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs7}; ; CHECK-NOF16-NEXT: ret; @@ -1917,10 +1841,11 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { ; CHECK-F16: { ; CHECK-F16-NEXT: .reg .b16 %rs<3>; ; CHECK-F16-NEXT: .reg .b32 %r<8>; +; CHECK-F16-NEXT: .reg .b64 %rd<2>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: -; CHECK-F16-NEXT: ld.param.v2.b32 {%r2, %r3}, [test_copysign_f32_param_1]; ; CHECK-F16-NEXT: ld.param.b32 %r1, [test_copysign_f32_param_0]; +; CHECK-F16-NEXT: ld.param.v2.b32 {%r2, %r3}, [test_copysign_f32_param_1]; ; CHECK-F16-NEXT: cvt.rn.f16.f32 %rs1, %r3; ; CHECK-F16-NEXT: cvt.rn.f16.f32 %rs2, %r2; ; CHECK-F16-NEXT: mov.b32 %r4, {%rs2, %rs1}; @@ -1934,19 +1859,19 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .b16 %rs<9>; ; CHECK-NOF16-NEXT: .reg .b32 %r<6>; +; CHECK-NOF16-NEXT: .reg .b64 %rd<2>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_copysign_f32_param_0]; ; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r2, %r3}, [test_copysign_f32_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_f32_param_0]; +; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, 32767; ; CHECK-NOF16-NEXT: and.b32 %r4, %r3, -2147483648; -; CHECK-NOF16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r4; } -; CHECK-NOF16-NEXT: mov.b32 {%rs2, %rs3}, %r1; -; CHECK-NOF16-NEXT: and.b16 %rs4, %rs3, 32767; -; CHECK-NOF16-NEXT: or.b16 %rs5, %rs4, %rs1; +; CHECK-NOF16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs4}, %r4; } +; CHECK-NOF16-NEXT: or.b16 %rs5, %rs3, %rs4; +; CHECK-NOF16-NEXT: and.b16 %rs6, %rs1, 32767; ; CHECK-NOF16-NEXT: and.b32 %r5, %r2, -2147483648; -; CHECK-NOF16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs6}, %r5; } -; CHECK-NOF16-NEXT: and.b16 %rs7, %rs2, 32767; -; CHECK-NOF16-NEXT: or.b16 %rs8, %rs7, %rs6; +; CHECK-NOF16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r5; } +; CHECK-NOF16-NEXT: or.b16 %rs8, %rs6, %rs7; ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs8, %rs5}; ; CHECK-NOF16-NEXT: ret; %tb = fptrunc <2 x float> %b to <2 x half> @@ -1981,8 +1906,7 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_copysign_f64_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_f64_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_copysign_f64_param_0]; ; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, 32767; ; CHECK-NOF16-NEXT: and.b64 %rd3, %rd2, -9223372036854775808; ; CHECK-NOF16-NEXT: shr.u64 %rd4, %rd3, 48; @@ -2024,15 +1948,13 @@ define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<5>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_copysign_extended_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_extended_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: and.b16 %rs3, %rs1, -32768; -; CHECK-NOF16-NEXT: mov.b32 {%rs4, %rs5}, %r1; -; CHECK-NOF16-NEXT: and.b16 %rs6, %rs4, 32767; -; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs3; -; CHECK-NOF16-NEXT: and.b16 %rs8, %rs2, -32768; -; CHECK-NOF16-NEXT: and.b16 %rs9, %rs5, 32767; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_copysign_extended_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_copysign_extended_param_1]; +; CHECK-NOF16-NEXT: and.b16 %rs5, %rs3, -32768; +; CHECK-NOF16-NEXT: and.b16 %rs6, %rs1, 32767; +; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs5; +; CHECK-NOF16-NEXT: and.b16 %rs8, %rs4, -32768; +; CHECK-NOF16-NEXT: and.b16 %rs9, %rs2, 32767; ; CHECK-NOF16-NEXT: or.b16 %rs10, %rs9, %rs8; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs10; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs7; @@ -2050,8 +1972,7 @@ define <2 x half> @test_floor(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_floor_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_floor_param_0]; ; CHECK-NEXT: cvt.rmi.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rmi.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2067,8 +1988,7 @@ define <2 x half> @test_ceil(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_ceil_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_ceil_param_0]; ; CHECK-NEXT: cvt.rpi.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rpi.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2084,8 +2004,7 @@ define <2 x half> @test_trunc(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_trunc_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_trunc_param_0]; ; CHECK-NEXT: cvt.rzi.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rzi.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2101,8 +2020,7 @@ define <2 x half> @test_rint(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_rint_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_rint_param_0]; ; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2118,8 +2036,7 @@ define <2 x half> @test_nearbyint(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_nearbyint_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_nearbyint_param_0]; ; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2135,8 +2052,7 @@ define <2 x half> @test_roundeven(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_roundeven_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_roundeven_param_0]; ; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2154,8 +2070,7 @@ define <2 x half> @test_round(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<21>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_round_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_round_param_0]; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; ; CHECK-NEXT: or.b32 %r4, %r3, 1056964608; @@ -2206,20 +2121,17 @@ define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 ; CHECK-NOF16-NEXT: .reg .b32 %r<13>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_fmuladd_param_2]; -; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fmuladd_param_1]; -; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fmuladd_param_0]; -; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; -; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4; -; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs6; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fmuladd_param_0]; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fmuladd_param_2]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; +; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_fmuladd_param_1]; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs2; ; CHECK-NOF16-NEXT: fma.rn.f32 %r7, %r6, %r5, %r4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs7, %r7; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs1; ; CHECK-NOF16-NEXT: fma.rn.f32 %r11, %r10, %r9, %r8; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs8, %r11; ; CHECK-NOF16-NEXT: mov.b32 %r12, {%rs8, %rs7}; @@ -2236,8 +2148,7 @@ define <2 x half> @test_shufflevector(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_shufflevector_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_shufflevector_param_0]; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; ; CHECK-NEXT: ret; %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> @@ -2247,13 +2158,12 @@ define <2 x half> @test_shufflevector(<2 x half> %a) #0 { define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { ; CHECK-LABEL: test_insertelement( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b16 %rs<4>; ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1]; -; CHECK-NEXT: ld.param.b32 %r1, [test_insertelement_param_0]; -; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {%rs2, tmp}, %r1; } +; CHECK-NEXT: ld.param.v2.b16 {%rs2, %rs3}, [test_insertelement_param_0]; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; ; CHECK-NEXT: ret; %i = insertelement <2 x half> %a, half %x, i64 1 @@ -2267,8 +2177,7 @@ define <2 x half> @test_sitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_sitofp_2xi16_to_2xhalf_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sitofp_2xi16_to_2xhalf_param_0]; ; CHECK-NEXT: cvt.rn.f16.s16 %rs3, %rs2; ; CHECK-NEXT: cvt.rn.f16.s16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2284,8 +2193,7 @@ define <2 x half> @test_uitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_uitofp_2xi16_to_2xhalf_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_uitofp_2xi16_to_2xhalf_param_0]; ; CHECK-NEXT: cvt.rn.f16.u16 %rs3, %rs2; ; CHECK-NEXT: cvt.rn.f16.u16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll new file mode 100644 index 0000000000000..af3cb63082e78 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -0,0 +1,1962 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; ## Full FP32x2 support enabled by default. +; RUN: llc < %s -mcpu=sm_80 -O0 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | FileCheck --check-prefixes=CHECK,CHECK-NOF32X2 %s +; RUN: %if ptxas-12.7 %{ \ +; RUN: llc < %s -mcpu=sm_80 -O0 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | %ptxas-verify -arch=sm_80 \ +; RUN: %} +; RUN: llc < %s -mcpu=sm_100 -O0 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | FileCheck --check-prefixes=CHECK,CHECK-F32X2 %s +; RUN: %if ptxas-12.7 %{ \ +; RUN: llc < %s -mcpu=sm_100 -O0 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | %ptxas-verify -arch=sm_100 \ +; RUN: %} + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "nvptx64-nvidia-cuda" + +define <2 x float> @test_ret_const() #0 { +; CHECK-LABEL: test_ret_const( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0f3F800000, 0f40000000}; +; CHECK-NEXT: ret; + ret <2 x float> +} + +define float @test_extract_0(<2 x float> %a) #0 { +; CHECK-LABEL: test_extract_0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_0_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %e = extractelement <2 x float> %a, i32 0 + ret float %e +} + +define float @test_extract_1(<2 x float> %a) #0 { +; CHECK-LABEL: test_extract_1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_1_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %e = extractelement <2 x float> %a, i32 1 + ret float %e +} + +; NOTE: disabled as -O3 miscompiles this into pointer arithmetic on +; test_extract_i_param_0 where the symbol's address is not taken first (that +; is, moved to a temporary) +; define float @test_extract_i(<2 x float> %a, i64 %idx) #0 { +; %e = extractelement <2 x float> %a, i64 %idx +; ret float %e +; } + +define <2 x float> @test_fadd(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-NOF32X2-LABEL: test_fadd( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fadd_param_1]; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r2, %r4; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fadd_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_param_0]; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fadd <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_fadd_imm_0(<2 x float> %a) #0 { +; CHECK-NOF32X2-LABEL: test_fadd_imm_0( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_0_param_0]; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r3, %r2, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r4, %r1, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_imm_0( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<3>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_0_param_0]; +; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40000000; +; CHECK-F32X2-NEXT: mov.b32 %r2, 0f3F800000; +; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r2, %r1}; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fadd <2 x float> , %a + ret <2 x float> %r +} + +define <2 x float> @test_fadd_imm_1(<2 x float> %a) #0 { +; CHECK-NOF32X2-LABEL: test_fadd_imm_1( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_1_param_0]; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r3, %r2, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r4, %r1, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_imm_1( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<3>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_1_param_0]; +; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40000000; +; CHECK-F32X2-NEXT: mov.b32 %r2, 0f3F800000; +; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r2, %r1}; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fadd <2 x float> %a, + ret <2 x float> %r +} + +define <4 x float> @test_fadd_v4(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-NOF32X2-LABEL: test_fadd_v4( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<13>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<5>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_v4_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [test_fadd_v4_param_1]; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r9, %r4, %r8; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r10, %r3, %r7; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r11, %r2, %r6; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r12, %r1, %r5; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_v4( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_fadd_v4_param_1]; +; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_v4_param_0]; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd5, %rd2, %rd4; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd6, %rd1, %rd3; +; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd5}; +; CHECK-F32X2-NEXT: ret; + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +define <4 x float> @test_fadd_imm_0_v4(<4 x float> %a) #0 { +; CHECK-NOF32X2-LABEL: test_fadd_imm_0_v4( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_0_v4_param_0]; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r4, 0f40800000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r3, 0f40400000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r2, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r1, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_imm_0_v4( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<5>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_0_v4_param_0]; +; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40800000; +; CHECK-F32X2-NEXT: mov.b32 %r2, 0f40400000; +; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd4, %rd2, %rd3; +; CHECK-F32X2-NEXT: mov.b32 %r3, 0f40000000; +; CHECK-F32X2-NEXT: mov.b32 %r4, 0f3F800000; +; CHECK-F32X2-NEXT: mov.b64 %rd5, {%r4, %r3}; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd6, %rd1, %rd5; +; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd4}; +; CHECK-F32X2-NEXT: ret; + %r = fadd <4 x float> , %a + ret <4 x float> %r +} + +define <4 x float> @test_fadd_imm_1_v4(<4 x float> %a) #0 { +; CHECK-NOF32X2-LABEL: test_fadd_imm_1_v4( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_1_v4_param_0]; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r4, 0f40800000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r3, 0f40400000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r2, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r1, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_imm_1_v4( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<5>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_1_v4_param_0]; +; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40800000; +; CHECK-F32X2-NEXT: mov.b32 %r2, 0f40400000; +; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd4, %rd2, %rd3; +; CHECK-F32X2-NEXT: mov.b32 %r3, 0f40000000; +; CHECK-F32X2-NEXT: mov.b32 %r4, 0f3F800000; +; CHECK-F32X2-NEXT: mov.b64 %rd5, {%r4, %r3}; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd6, %rd1, %rd5; +; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd4}; +; CHECK-F32X2-NEXT: ret; + %r = fadd <4 x float> %a, + ret <4 x float> %r +} + +define <2 x float> @test_fsub(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-NOF32X2-LABEL: test_fsub( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fsub_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fsub_param_1]; +; CHECK-NOF32X2-NEXT: sub.rn.f32 %r5, %r2, %r4; +; CHECK-NOF32X2-NEXT: sub.rn.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fsub( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fsub_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fsub_param_0]; +; CHECK-F32X2-NEXT: sub.rn.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fsub <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_fneg(<2 x float> %a) #0 { +; CHECK-LABEL: test_fneg( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fneg_param_0]; +; CHECK-NEXT: neg.f32 %r3, %r2; +; CHECK-NEXT: neg.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = fneg <2 x float> %a + ret <2 x float> %r +} + +define <2 x float> @test_fmul(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-NOF32X2-LABEL: test_fmul( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmul_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmul_param_1]; +; CHECK-NOF32X2-NEXT: mul.rn.f32 %r5, %r2, %r4; +; CHECK-NOF32X2-NEXT: mul.rn.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fmul( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fmul_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fmul_param_0]; +; CHECK-F32X2-NEXT: mul.rn.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fmul <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_fdiv(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fdiv( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fdiv_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fdiv_param_1]; +; CHECK-NEXT: div.rn.f32 %r5, %r2, %r4; +; CHECK-NEXT: div.rn.f32 %r6, %r1, %r3; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NEXT: ret; + %r = fdiv <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_frem(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_frem( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b32 %r<15>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_frem_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_frem_param_1]; +; CHECK-NEXT: div.rn.f32 %r5, %r2, %r4; +; CHECK-NEXT: cvt.rzi.f32.f32 %r6, %r5; +; CHECK-NEXT: neg.f32 %r7, %r6; +; CHECK-NEXT: fma.rn.f32 %r8, %r7, %r4, %r2; +; CHECK-NEXT: testp.infinite.f32 %p1, %r4; +; CHECK-NEXT: selp.f32 %r9, %r2, %r8, %p1; +; CHECK-NEXT: div.rn.f32 %r10, %r1, %r3; +; CHECK-NEXT: cvt.rzi.f32.f32 %r11, %r10; +; CHECK-NEXT: neg.f32 %r12, %r11; +; CHECK-NEXT: fma.rn.f32 %r13, %r12, %r3, %r1; +; CHECK-NEXT: testp.infinite.f32 %p2, %r3; +; CHECK-NEXT: selp.f32 %r14, %r1, %r13, %p2; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r9}; +; CHECK-NEXT: ret; + %r = frem <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_fadd_ftz(<2 x float> %a, <2 x float> %b) #2 { +; CHECK-NOF32X2-LABEL: test_fadd_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_ftz_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fadd_ftz_param_1]; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r2, %r4; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fadd_ftz_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_ftz_param_0]; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fadd <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_fadd_imm_0_ftz(<2 x float> %a) #2 { +; CHECK-NOF32X2-LABEL: test_fadd_imm_0_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_0_ftz_param_0]; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r3, %r2, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r4, %r1, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_imm_0_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<3>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_0_ftz_param_0]; +; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40000000; +; CHECK-F32X2-NEXT: mov.b32 %r2, 0f3F800000; +; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r2, %r1}; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fadd <2 x float> , %a + ret <2 x float> %r +} + +define <2 x float> @test_fadd_imm_1_ftz(<2 x float> %a) #2 { +; CHECK-NOF32X2-LABEL: test_fadd_imm_1_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_1_ftz_param_0]; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r3, %r2, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r4, %r1, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_imm_1_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<3>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_1_ftz_param_0]; +; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40000000; +; CHECK-F32X2-NEXT: mov.b32 %r2, 0f3F800000; +; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r2, %r1}; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fadd <2 x float> %a, + ret <2 x float> %r +} + +define <4 x float> @test_fadd_v4_ftz(<4 x float> %a, <4 x float> %b) #2 { +; CHECK-NOF32X2-LABEL: test_fadd_v4_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<13>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<5>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_v4_ftz_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [test_fadd_v4_ftz_param_1]; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r9, %r4, %r8; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r10, %r3, %r7; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r11, %r2, %r6; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r12, %r1, %r5; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_v4_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_fadd_v4_ftz_param_1]; +; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_v4_ftz_param_0]; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd5, %rd2, %rd4; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd6, %rd1, %rd3; +; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd5}; +; CHECK-F32X2-NEXT: ret; + %r = fadd <4 x float> %a, %b + ret <4 x float> %r +} + +define <4 x float> @test_fadd_imm_0_v4_ftz(<4 x float> %a) #2 { +; CHECK-NOF32X2-LABEL: test_fadd_imm_0_v4_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_0_v4_ftz_param_0]; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r4, 0f40800000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r3, 0f40400000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r7, %r2, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r8, %r1, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_imm_0_v4_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<5>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_0_v4_ftz_param_0]; +; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40800000; +; CHECK-F32X2-NEXT: mov.b32 %r2, 0f40400000; +; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd4, %rd2, %rd3; +; CHECK-F32X2-NEXT: mov.b32 %r3, 0f40000000; +; CHECK-F32X2-NEXT: mov.b32 %r4, 0f3F800000; +; CHECK-F32X2-NEXT: mov.b64 %rd5, {%r4, %r3}; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd6, %rd1, %rd5; +; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd4}; +; CHECK-F32X2-NEXT: ret; + %r = fadd <4 x float> , %a + ret <4 x float> %r +} + +define <4 x float> @test_fadd_imm_1_v4_ftz(<4 x float> %a) #2 { +; CHECK-NOF32X2-LABEL: test_fadd_imm_1_v4_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_1_v4_ftz_param_0]; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r4, 0f40800000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r3, 0f40400000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r7, %r2, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r8, %r1, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fadd_imm_1_v4_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<5>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_1_v4_ftz_param_0]; +; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40800000; +; CHECK-F32X2-NEXT: mov.b32 %r2, 0f40400000; +; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd4, %rd2, %rd3; +; CHECK-F32X2-NEXT: mov.b32 %r3, 0f40000000; +; CHECK-F32X2-NEXT: mov.b32 %r4, 0f3F800000; +; CHECK-F32X2-NEXT: mov.b64 %rd5, {%r4, %r3}; +; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd6, %rd1, %rd5; +; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd4}; +; CHECK-F32X2-NEXT: ret; + %r = fadd <4 x float> %a, + ret <4 x float> %r +} + +define <2 x float> @test_fsub_ftz(<2 x float> %a, <2 x float> %b) #2 { +; CHECK-NOF32X2-LABEL: test_fsub_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fsub_ftz_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fsub_ftz_param_1]; +; CHECK-NOF32X2-NEXT: sub.rn.ftz.f32 %r5, %r2, %r4; +; CHECK-NOF32X2-NEXT: sub.rn.ftz.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fsub_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fsub_ftz_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fsub_ftz_param_0]; +; CHECK-F32X2-NEXT: sub.rn.ftz.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fsub <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_fneg_ftz(<2 x float> %a) #2 { +; CHECK-LABEL: test_fneg_ftz( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fneg_ftz_param_0]; +; CHECK-NEXT: neg.ftz.f32 %r3, %r2; +; CHECK-NEXT: neg.ftz.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = fneg <2 x float> %a + ret <2 x float> %r +} + +define <2 x float> @test_fmul_ftz(<2 x float> %a, <2 x float> %b) #2 { +; CHECK-NOF32X2-LABEL: test_fmul_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmul_ftz_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmul_ftz_param_1]; +; CHECK-NOF32X2-NEXT: mul.rn.ftz.f32 %r5, %r2, %r4; +; CHECK-NOF32X2-NEXT: mul.rn.ftz.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fmul_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fmul_ftz_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fmul_ftz_param_0]; +; CHECK-F32X2-NEXT: mul.rn.ftz.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %r = fmul <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_fma_ftz(<2 x float> %a, <2 x float> %b, <2 x float> %c) #2 { +; CHECK-NOF32X2-LABEL: test_fma_ftz( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fma_ftz_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fma_ftz_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fma_ftz_param_2]; +; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r7, %r2, %r4, %r6; +; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r8, %r1, %r3, %r5; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fma_ftz( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [test_fma_ftz_param_2]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fma_ftz_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fma_ftz_param_0]; +; CHECK-F32X2-NEXT: fma.rn.ftz.f32x2 %rd4, %rd1, %rd2, %rd3; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd4; +; CHECK-F32X2-NEXT: ret; + %r = call <2 x float> @llvm.fma(<2 x float> %a, <2 x float> %b, <2 x float> %c) + ret <2 x float> %r +} + +define <2 x float> @test_fdiv_ftz(<2 x float> %a, <2 x float> %b) #2 { +; CHECK-LABEL: test_fdiv_ftz( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fdiv_ftz_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fdiv_ftz_param_1]; +; CHECK-NEXT: div.rn.ftz.f32 %r5, %r2, %r4; +; CHECK-NEXT: div.rn.ftz.f32 %r6, %r1, %r3; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NEXT: ret; + %r = fdiv <2 x float> %a, %b + ret <2 x float> %r +} + +define <2 x float> @test_frem_ftz(<2 x float> %a, <2 x float> %b) #2 { +; CHECK-LABEL: test_frem_ftz( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b32 %r<15>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_frem_ftz_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_frem_ftz_param_1]; +; CHECK-NEXT: div.rn.ftz.f32 %r5, %r2, %r4; +; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %r6, %r5; +; CHECK-NEXT: neg.ftz.f32 %r7, %r6; +; CHECK-NEXT: fma.rn.ftz.f32 %r8, %r7, %r4, %r2; +; CHECK-NEXT: testp.infinite.f32 %p1, %r4; +; CHECK-NEXT: selp.f32 %r9, %r2, %r8, %p1; +; CHECK-NEXT: div.rn.ftz.f32 %r10, %r1, %r3; +; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %r11, %r10; +; CHECK-NEXT: neg.ftz.f32 %r12, %r11; +; CHECK-NEXT: fma.rn.ftz.f32 %r13, %r12, %r3, %r1; +; CHECK-NEXT: testp.infinite.f32 %p2, %r3; +; CHECK-NEXT: selp.f32 %r14, %r1, %r13, %p2; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r9}; +; CHECK-NEXT: ret; + %r = frem <2 x float> %a, %b + ret <2 x float> %r +} + +define void @test_ldst_v2f32(ptr %a, ptr %b) #0 { +; CHECK-LABEL: test_ldst_v2f32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_ldst_v2f32_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_ldst_v2f32_param_0]; +; CHECK-NEXT: ld.b64 %rd3, [%rd1]; +; CHECK-NEXT: st.b64 [%rd2], %rd3; +; CHECK-NEXT: ret; + %t1 = load <2 x float>, ptr %a + store <2 x float> %t1, ptr %b, align 32 + ret void +} + +define void @test_ldst_v3f32(ptr %a, ptr %b) #0 { +; CHECK-LABEL: test_ldst_v3f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_ldst_v3f32_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_ldst_v3f32_param_0]; +; CHECK-NEXT: ld.b64 %rd3, [%rd1]; +; CHECK-NEXT: ld.b32 %r1, [%rd1+8]; +; CHECK-NEXT: st.b32 [%rd2+8], %r1; +; CHECK-NEXT: st.b64 [%rd2], %rd3; +; CHECK-NEXT: ret; + %t1 = load <3 x float>, ptr %a + store <3 x float> %t1, ptr %b, align 32 + ret void +} + +define void @test_ldst_v4f32(ptr %a, ptr %b) #0 { +; CHECK-LABEL: test_ldst_v4f32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_ldst_v4f32_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_ldst_v4f32_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd1]; +; CHECK-NEXT: st.v2.b64 [%rd2], {%rd3, %rd4}; +; CHECK-NEXT: ret; + %t1 = load <4 x float>, ptr %a + store <4 x float> %t1, ptr %b, align 32 + ret void +} + +define void @test_ldst_v8f32(ptr %a, ptr %b) #0 { +; CHECK-LABEL: test_ldst_v8f32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_ldst_v8f32_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_ldst_v8f32_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd1]; +; CHECK-NEXT: ld.v2.b64 {%rd5, %rd6}, [%rd1+16]; +; CHECK-NEXT: st.v2.b64 [%rd2+16], {%rd5, %rd6}; +; CHECK-NEXT: st.v2.b64 [%rd2], {%rd3, %rd4}; +; CHECK-NEXT: ret; + %t1 = load <8 x float>, ptr %a + store <8 x float> %t1, ptr %b, align 32 + ret void +} + +declare <2 x float> @test_callee(<2 x float> %a, <2 x float> %b) #0 + +define <2 x float> @test_call(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_call( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_call_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_call_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd1; +; CHECK-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd2; +; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; + %r = call <2 x float> @test_callee(<2 x float> %a, <2 x float> %b) + ret <2 x float> %r +} + +define <2 x float> @test_call_flipped(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_call_flipped( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_call_flipped_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_call_flipped_param_0]; +; CHECK-NEXT: { // callseq 1, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd2; +; CHECK-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd1; +; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; +; CHECK-NEXT: } // callseq 1 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; + %r = call <2 x float> @test_callee(<2 x float> %b, <2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_tailcall_flipped(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_tailcall_flipped( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_tailcall_flipped_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tailcall_flipped_param_0]; +; CHECK-NEXT: { // callseq 2, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd2; +; CHECK-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd1; +; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); +; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; +; CHECK-NEXT: } // callseq 2 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; + %r = tail call <2 x float> @test_callee(<2 x float> %b, <2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_select(<2 x float> %a, <2 x float> %b, i1 zeroext %c) #0 { +; CHECK-LABEL: test_select( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [test_select_param_2]; +; CHECK-NEXT: and.b16 %rs2, %rs1, 1; +; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; +; CHECK-NEXT: ld.param.b64 %rd2, [test_select_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_select_param_0]; +; CHECK-NEXT: selp.b64 %rd3, %rd1, %rd2, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; + %r = select i1 %c, <2 x float> %a, <2 x float> %b + ret <2 x float> %r +} + +define <2 x float> @test_select_cc(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) #0 { +; CHECK-LABEL: test_select_cc( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b32 %r<11>; +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_param_2]; +; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_param_3]; +; CHECK-NEXT: setp.neu.f32 %p1, %r3, %r5; +; CHECK-NEXT: setp.neu.f32 %p2, %r4, %r6; +; CHECK-NEXT: ld.param.v2.b32 {%r7, %r8}, [test_select_cc_param_1]; +; CHECK-NEXT: selp.f32 %r9, %r2, %r8, %p2; +; CHECK-NEXT: selp.f32 %r10, %r1, %r7, %p1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r10, %r9}; +; CHECK-NEXT: ret; + %cc = fcmp une <2 x float> %c, %d + %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b + ret <2 x float> %r +} + +define <2 x double> @test_select_cc_f64_f32(<2 x double> %a, <2 x double> %b, <2 x float> %c, <2 x float> %d) #0 { +; CHECK-LABEL: test_select_cc_f64_f32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_select_cc_f64_f32_param_1]; +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_select_cc_f64_f32_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_f64_f32_param_2]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f64_f32_param_3]; +; CHECK-NEXT: setp.neu.f32 %p1, %r1, %r3; +; CHECK-NEXT: setp.neu.f32 %p2, %r2, %r4; +; CHECK-NEXT: selp.f64 %rd7, %rd2, %rd4, %p2; +; CHECK-NEXT: selp.f64 %rd8, %rd1, %rd3, %p1; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd7}; +; CHECK-NEXT: ret; + %cc = fcmp une <2 x float> %c, %d + %r = select <2 x i1> %cc, <2 x double> %a, <2 x double> %b + ret <2 x double> %r +} + +define <2 x float> @test_select_cc_f32_f64(<2 x float> %a, <2 x float> %b, <2 x double> %c, <2 x double> %d) #0 { +; CHECK-LABEL: test_select_cc_f32_f64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<7>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [test_select_cc_f32_f64_param_3]; +; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_select_cc_f32_f64_param_2]; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_f32_f64_param_0]; +; CHECK-NEXT: setp.neu.f64 %p1, %rd3, %rd5; +; CHECK-NEXT: setp.neu.f64 %p2, %rd4, %rd6; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f64_param_1]; +; CHECK-NEXT: selp.f32 %r5, %r2, %r4, %p2; +; CHECK-NEXT: selp.f32 %r6, %r1, %r3, %p1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NEXT: ret; + %cc = fcmp une <2 x double> %c, %d + %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b + ret <2 x float> %r +} + +define <2 x i1> @test_fcmp_une(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_une( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_une_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_une_param_1]; +; CHECK-NEXT: setp.neu.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.neu.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp une <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_ueq(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_ueq( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ueq_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ueq_param_1]; +; CHECK-NEXT: setp.equ.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.equ.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp ueq <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_ugt(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_ugt( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ugt_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ugt_param_1]; +; CHECK-NEXT: setp.gtu.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.gtu.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp ugt <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_uge(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_uge( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_uge_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_uge_param_1]; +; CHECK-NEXT: setp.geu.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.geu.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp uge <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_ult(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_ult( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ult_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ult_param_1]; +; CHECK-NEXT: setp.ltu.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.ltu.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp ult <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_ule(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_ule( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ule_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ule_param_1]; +; CHECK-NEXT: setp.leu.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.leu.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp ule <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_uno(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_uno( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_uno_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_uno_param_1]; +; CHECK-NEXT: setp.nan.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.nan.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp uno <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_one(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_one( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_one_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_one_param_1]; +; CHECK-NEXT: setp.ne.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.ne.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp one <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_oeq(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_oeq( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_oeq_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_oeq_param_1]; +; CHECK-NEXT: setp.eq.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.eq.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp oeq <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_ogt(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_ogt( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ogt_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ogt_param_1]; +; CHECK-NEXT: setp.gt.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.gt.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp ogt <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_oge(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_oge( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_oge_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_oge_param_1]; +; CHECK-NEXT: setp.ge.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.ge.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp oge <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_olt(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_olt( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_olt_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_olt_param_1]; +; CHECK-NEXT: setp.lt.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.lt.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp olt <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_ole(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_ole( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ole_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ole_param_1]; +; CHECK-NEXT: setp.le.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.le.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp ole <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i1> @test_fcmp_ord(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_fcmp_ord( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ord_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ord_param_1]; +; CHECK-NEXT: setp.num.f32 %p1, %r2, %r4; +; CHECK-NEXT: setp.num.f32 %p2, %r1, %r3; +; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; +; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs2; +; CHECK-NEXT: ret; + %r = fcmp ord <2 x float> %a, %b + ret <2 x i1> %r +} + +define <2 x i32> @test_fptosi_i32(<2 x float> %a) #0 { +; CHECK-LABEL: test_fptosi_i32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptosi_i32_param_0]; +; CHECK-NEXT: cvt.rzi.s32.f32 %r3, %r2; +; CHECK-NEXT: cvt.rzi.s32.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = fptosi <2 x float> %a to <2 x i32> + ret <2 x i32> %r +} + +define <2 x i64> @test_fptosi_i64(<2 x float> %a) #0 { +; CHECK-LABEL: test_fptosi_i64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptosi_i64_param_0]; +; CHECK-NEXT: cvt.rzi.s64.f32 %rd2, %r2; +; CHECK-NEXT: cvt.rzi.s64.f32 %rd3, %r1; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; +; CHECK-NEXT: ret; + %r = fptosi <2 x float> %a to <2 x i64> + ret <2 x i64> %r +} + +define <2 x i32> @test_fptoui_2xi32(<2 x float> %a) #0 { +; CHECK-LABEL: test_fptoui_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptoui_2xi32_param_0]; +; CHECK-NEXT: cvt.rzi.u32.f32 %r3, %r2; +; CHECK-NEXT: cvt.rzi.u32.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = fptoui <2 x float> %a to <2 x i32> + ret <2 x i32> %r +} + +define <2 x i64> @test_fptoui_2xi64(<2 x float> %a) #0 { +; CHECK-LABEL: test_fptoui_2xi64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptoui_2xi64_param_0]; +; CHECK-NEXT: cvt.rzi.u64.f32 %rd2, %r2; +; CHECK-NEXT: cvt.rzi.u64.f32 %rd3, %r1; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; +; CHECK-NEXT: ret; + %r = fptoui <2 x float> %a to <2 x i64> + ret <2 x i64> %r +} + +define <2 x float> @test_uitofp_2xi32(<2 x i32> %a) #0 { +; CHECK-LABEL: test_uitofp_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_param_0]; +; CHECK-NEXT: cvt.rn.f32.u32 %r3, %r2; +; CHECK-NEXT: cvt.rn.f32.u32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = uitofp <2 x i32> %a to <2 x float> + ret <2 x float> %r +} + +define <2 x float> @test_uitofp_2xi64(<2 x i64> %a) #0 { +; CHECK-LABEL: test_uitofp_2xi64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_uitofp_2xi64_param_0]; +; CHECK-NEXT: cvt.rn.f32.u64 %r1, %rd2; +; CHECK-NEXT: cvt.rn.f32.u64 %r2, %rd1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-NEXT: ret; + %r = uitofp <2 x i64> %a to <2 x float> + ret <2 x float> %r +} + +define <2 x float> @test_sitofp_2xi32(<2 x i32> %a) #0 { +; CHECK-LABEL: test_sitofp_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sitofp_2xi32_param_0]; +; CHECK-NEXT: cvt.rn.f32.s32 %r3, %r2; +; CHECK-NEXT: cvt.rn.f32.s32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = sitofp <2 x i32> %a to <2 x float> + ret <2 x float> %r +} + +define <2 x float> @test_sitofp_2xi64(<2 x i64> %a) #0 { +; CHECK-LABEL: test_sitofp_2xi64( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_sitofp_2xi64_param_0]; +; CHECK-NEXT: cvt.rn.f32.s64 %r1, %rd2; +; CHECK-NEXT: cvt.rn.f32.s64 %r2, %rd1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-NEXT: ret; + %r = sitofp <2 x i64> %a to <2 x float> + ret <2 x float> %r +} + +define <2 x float> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x float> %b) #0 { +; CHECK-NOF32X2-LABEL: test_uitofp_2xi32_fadd( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; +; CHECK-NOF32X2-NEXT: cvt.rn.f32.u32 %r3, %r1; +; CHECK-NOF32X2-NEXT: cvt.rn.f32.u32 %r4, %r2; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_uitofp_2xi32_fadd_param_1]; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r6, %r4; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r5, %r3; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_uitofp_2xi32_fadd( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<5>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_uitofp_2xi32_fadd_param_1]; +; CHECK-F32X2-NEXT: cvt.rn.f32.u32 %r3, %r2; +; CHECK-F32X2-NEXT: cvt.rn.f32.u32 %r4, %r1; +; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r4, %r3}; +; CHECK-F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-F32X2-NEXT: ret; + %c = uitofp <2 x i32> %a to <2 x float> + %r = fadd <2 x float> %b, %c + ret <2 x float> %r +} + +define <2 x float> @test_fptrunc_2xdouble(<2 x double> %a) #0 { +; CHECK-LABEL: test_fptrunc_2xdouble( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fptrunc_2xdouble_param_0]; +; CHECK-NEXT: cvt.rn.f32.f64 %r1, %rd2; +; CHECK-NEXT: cvt.rn.f32.f64 %r2, %rd1; +; CHECK-NEXT: mov.b64 %rd3, {%r2, %r1}; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; + %r = fptrunc <2 x double> %a to <2 x float> + ret <2 x float> %r +} + +define <2 x double> @test_fpext_2xdouble(<2 x float> %a) #0 { +; CHECK-LABEL: test_fpext_2xdouble( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fpext_2xdouble_param_0]; +; CHECK-NEXT: cvt.f64.f32 %rd2, %r2; +; CHECK-NEXT: cvt.f64.f32 %rd3, %r1; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; +; CHECK-NEXT: ret; + %r = fpext <2 x float> %a to <2 x double> + ret <2 x double> %r +} + +define <2 x i32> @test_bitcast_2xfloat_to_2xi32(<2 x float> %a) #0 { +; CHECK-LABEL: test_bitcast_2xfloat_to_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_bitcast_2xfloat_to_2xi32_param_0]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %r = bitcast <2 x float> %a to <2 x i32> + ret <2 x i32> %r +} + +define <2 x float> @test_bitcast_2xi32_to_2xfloat(<2 x i32> %a) #0 { +; CHECK-LABEL: test_bitcast_2xi32_to_2xfloat( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_bitcast_2xi32_to_2xfloat_param_0]; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r2}; +; CHECK-NEXT: ret; + %r = bitcast <2 x i32> %a to <2 x float> + ret <2 x float> %r +} + +define <2 x float> @test_bitcast_double_to_2xfloat(double %a) #0 { +; CHECK-LABEL: test_bitcast_double_to_2xfloat( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_bitcast_double_to_2xfloat_param_0]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %r = bitcast double %a to <2 x float> + ret <2 x float> %r +} + +define double @test_bitcast_2xfloat_to_double(<2 x float> %a) #0 { +; CHECK-LABEL: test_bitcast_2xfloat_to_double( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_bitcast_2xfloat_to_double_param_0]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %r = bitcast <2 x float> %a to double + ret double %r +} + +define <2 x float> @test_sqrt(<2 x float> %a) #0 { +; CHECK-LABEL: test_sqrt( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sqrt_param_0]; +; CHECK-NEXT: sqrt.rn.f32 %r3, %r2; +; CHECK-NEXT: sqrt.rn.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.sqrt(<2 x float> %a) + ret <2 x float> %r +} + +;;; Can't do this yet: requires libcall. +; XCHECK-LABEL: test_powi( +;define <2 x float> @test_powi(<2 x float> %a, <2 x i32> %b) #0 { +; %r = call <2 x float> @llvm.powi.i32(<2 x float> %a, <2 x i32> %b) +; ret <2 x float> %r +;} + +define <2 x float> @test_sin(<2 x float> %a) #0 #1 { +; CHECK-LABEL: test_sin( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sin_param_0]; +; CHECK-NEXT: sin.approx.f32 %r3, %r2; +; CHECK-NEXT: sin.approx.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.sin(<2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_cos(<2 x float> %a) #0 #1 { +; CHECK-LABEL: test_cos( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_cos_param_0]; +; CHECK-NEXT: cos.approx.f32 %r3, %r2; +; CHECK-NEXT: cos.approx.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.cos(<2 x float> %a) + ret <2 x float> %r +} + +;;; Can't do this yet: requires libcall. +; XCHECK-LABEL: test_pow( +;define <2 x float> @test_pow(<2 x float> %a, <2 x float> %b) #0 { +; %r = call <2 x float> @llvm.pow(<2 x float> %a, <2 x float> %b) +; ret <2 x float> %r +;} + +;;; Can't do this yet: requires libcall. +; XCHECK-LABEL: test_exp( +;define <2 x float> @test_exp(<2 x float> %a) #0 { +; %r = call <2 x float> @llvm.exp(<2 x float> %a) +; ret <2 x float> %r +;} + +;;; Can't do this yet: requires libcall. +; XCHECK-LABEL: test_exp2( +;define <2 x float> @test_exp2(<2 x float> %a) #0 { +; %r = call <2 x float> @llvm.exp2(<2 x float> %a) +; ret <2 x float> %r +;} + +;;; Can't do this yet: requires libcall. +; XCHECK-LABEL: test_log( +;define <2 x float> @test_log(<2 x float> %a) #0 { +; %r = call <2 x float> @llvm.log(<2 x float> %a) +; ret <2 x float> %r +;} + +;;; Can't do this yet: requires libcall. +; XCHECK-LABEL: test_log10( +;define <2 x float> @test_log10(<2 x float> %a) #0 { +; %r = call <2 x float> @llvm.log10(<2 x float> %a) +; ret <2 x float> %r +;} + +;;; Can't do this yet: requires libcall. +; XCHECK-LABEL: test_log2( +;define <2 x float> @test_log2(<2 x float> %a) #0 { +; %r = call <2 x float> @llvm.log2(<2 x float> %a) +; ret <2 x float> %r +;} + + +define <2 x float> @test_fma(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +; CHECK-NOF32X2-LABEL: test_fma( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fma_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fma_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fma_param_2]; +; CHECK-NOF32X2-NEXT: fma.rn.f32 %r7, %r2, %r4, %r6; +; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r1, %r3, %r5; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fma( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [test_fma_param_2]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fma_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fma_param_0]; +; CHECK-F32X2-NEXT: fma.rn.f32x2 %rd4, %rd1, %rd2, %rd3; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd4; +; CHECK-F32X2-NEXT: ret; + %r = call <2 x float> @llvm.fma(<2 x float> %a, <2 x float> %b, <2 x float> %c) + ret <2 x float> %r +} + +define <2 x float> @test_fabs(<2 x float> %a) #0 { +; CHECK-LABEL: test_fabs( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fabs_param_0]; +; CHECK-NEXT: abs.f32 %r3, %r2; +; CHECK-NEXT: abs.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.fabs(<2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_minnum(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_minnum( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_minnum_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_minnum_param_1]; +; CHECK-NEXT: min.f32 %r5, %r2, %r4; +; CHECK-NEXT: min.f32 %r6, %r1, %r3; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.minnum(<2 x float> %a, <2 x float> %b) + ret <2 x float> %r +} + +define <2 x float> @test_maxnum(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_maxnum( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_maxnum_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_maxnum_param_1]; +; CHECK-NEXT: max.f32 %r5, %r2, %r4; +; CHECK-NEXT: max.f32 %r6, %r1, %r3; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.maxnum(<2 x float> %a, <2 x float> %b) + ret <2 x float> %r +} + +define <2 x float> @test_copysign(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_copysign( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_copysign_param_1]; +; CHECK-NEXT: copysign.f32 %r5, %r4, %r2; +; CHECK-NEXT: copysign.f32 %r6, %r3, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.copysign(<2 x float> %a, <2 x float> %b) + ret <2 x float> %r +} + +define <2 x float> @test_copysign_f64(<2 x float> %a, <2 x double> %b) #0 { +; CHECK-LABEL: test_copysign_f64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<3>; +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_copysign_f64_param_1]; +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_f64_param_0]; +; CHECK-NEXT: abs.f32 %r3, %r2; +; CHECK-NEXT: neg.f32 %r4, %r3; +; CHECK-NEXT: shr.u64 %rd4, %rd3, 63; +; CHECK-NEXT: and.b64 %rd5, %rd4, 1; +; CHECK-NEXT: setp.ne.b64 %p1, %rd5, 0; +; CHECK-NEXT: selp.f32 %r5, %r4, %r3, %p1; +; CHECK-NEXT: abs.f32 %r6, %r1; +; CHECK-NEXT: neg.f32 %r7, %r6; +; CHECK-NEXT: shr.u64 %rd6, %rd2, 63; +; CHECK-NEXT: and.b64 %rd7, %rd6, 1; +; CHECK-NEXT: setp.ne.b64 %p2, %rd7, 0; +; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p2; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r5}; +; CHECK-NEXT: ret; + %tb = fptrunc <2 x double> %b to <2 x float> + %r = call <2 x float> @llvm.copysign(<2 x float> %a, <2 x float> %tb) + ret <2 x float> %r +} + +define <2 x double> @test_copysign_extended(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_copysign_extended( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_extended_param_0]; +; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_copysign_extended_param_1]; +; CHECK-NEXT: copysign.f32 %r5, %r3, %r1; +; CHECK-NEXT: copysign.f32 %r6, %r4, %r2; +; CHECK-NEXT: cvt.f64.f32 %rd3, %r6; +; CHECK-NEXT: cvt.f64.f32 %rd4, %r5; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.copysign(<2 x float> %a, <2 x float> %b) + %xr = fpext <2 x float> %r to <2 x double> + ret <2 x double> %xr +} + +define <2 x float> @test_floor(<2 x float> %a) #0 { +; CHECK-LABEL: test_floor( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_floor_param_0]; +; CHECK-NEXT: cvt.rmi.f32.f32 %r3, %r2; +; CHECK-NEXT: cvt.rmi.f32.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.floor(<2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_ceil(<2 x float> %a) #0 { +; CHECK-LABEL: test_ceil( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_ceil_param_0]; +; CHECK-NEXT: cvt.rpi.f32.f32 %r3, %r2; +; CHECK-NEXT: cvt.rpi.f32.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.ceil(<2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_trunc(<2 x float> %a) #0 { +; CHECK-LABEL: test_trunc( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_param_0]; +; CHECK-NEXT: cvt.rzi.f32.f32 %r3, %r2; +; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.trunc(<2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_rint(<2 x float> %a) #0 { +; CHECK-LABEL: test_rint( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_rint_param_0]; +; CHECK-NEXT: cvt.rni.f32.f32 %r3, %r2; +; CHECK-NEXT: cvt.rni.f32.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.rint(<2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_nearbyint(<2 x float> %a) #0 { +; CHECK-LABEL: test_nearbyint( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_nearbyint_param_0]; +; CHECK-NEXT: cvt.rni.f32.f32 %r3, %r2; +; CHECK-NEXT: cvt.rni.f32.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.nearbyint(<2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_roundeven(<2 x float> %a) #0 { +; CHECK-LABEL: test_roundeven( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_roundeven_param_0]; +; CHECK-NEXT: cvt.rni.f32.f32 %r3, %r2; +; CHECK-NEXT: cvt.rni.f32.f32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.roundeven(<2 x float> %a) + ret <2 x float> %r +} + +; check the use of sign mask and 0.5 to implement round +define <2 x float> @test_round(<2 x float> %a) #0 { +; CHECK-LABEL: test_round( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<5>; +; CHECK-NEXT: .reg .b32 %r<19>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_round_param_0]; +; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; +; CHECK-NEXT: or.b32 %r4, %r3, 1056964608; +; CHECK-NEXT: add.rn.f32 %r5, %r2, %r4; +; CHECK-NEXT: cvt.rzi.f32.f32 %r6, %r5; +; CHECK-NEXT: abs.f32 %r7, %r2; +; CHECK-NEXT: setp.gt.f32 %p1, %r7, 0f4B000000; +; CHECK-NEXT: selp.f32 %r8, %r2, %r6, %p1; +; CHECK-NEXT: cvt.rzi.f32.f32 %r9, %r2; +; CHECK-NEXT: setp.lt.f32 %p2, %r7, 0f3F000000; +; CHECK-NEXT: selp.f32 %r10, %r9, %r8, %p2; +; CHECK-NEXT: and.b32 %r11, %r1, -2147483648; +; CHECK-NEXT: or.b32 %r12, %r11, 1056964608; +; CHECK-NEXT: add.rn.f32 %r13, %r1, %r12; +; CHECK-NEXT: cvt.rzi.f32.f32 %r14, %r13; +; CHECK-NEXT: abs.f32 %r15, %r1; +; CHECK-NEXT: setp.gt.f32 %p3, %r15, 0f4B000000; +; CHECK-NEXT: selp.f32 %r16, %r1, %r14, %p3; +; CHECK-NEXT: cvt.rzi.f32.f32 %r17, %r1; +; CHECK-NEXT: setp.lt.f32 %p4, %r15, 0f3F000000; +; CHECK-NEXT: selp.f32 %r18, %r17, %r16, %p4; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r18, %r10}; +; CHECK-NEXT: ret; + %r = call <2 x float> @llvm.round(<2 x float> %a) + ret <2 x float> %r +} + +define <2 x float> @test_fmuladd(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +; CHECK-NOF32X2-LABEL: test_fmuladd( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<4>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmuladd_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmuladd_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fmuladd_param_2]; +; CHECK-NOF32X2-NEXT: fma.rn.f32 %r7, %r2, %r4, %r6; +; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r1, %r3, %r5; +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_fmuladd( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [test_fmuladd_param_2]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fmuladd_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fmuladd_param_0]; +; CHECK-F32X2-NEXT: fma.rn.f32x2 %rd4, %rd1, %rd2, %rd3; +; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd4; +; CHECK-F32X2-NEXT: ret; + %r = call <2 x float> @llvm.fmuladd(<2 x float> %a, <2 x float> %b, <2 x float> %c) + ret <2 x float> %r +} + +define <2 x float> @test_shufflevector(<2 x float> %a) #0 { +; CHECK-LABEL: test_shufflevector( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_shufflevector_param_0]; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-NEXT: ret; + %s = shufflevector <2 x float> %a, <2 x float> poison, <2 x i32> + ret <2 x float> %s +} + +define <2 x float> @test_insertelement(<2 x float> %a, float %x) #0 { +; CHECK-LABEL: test_insertelement( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_insertelement_param_1]; +; CHECK-NEXT: ld.param.v2.b32 {%r2, %r3}, [test_insertelement_param_0]; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-NEXT: ret; + %i = insertelement <2 x float> %a, float %x, i64 1 + ret <2 x float> %i +} + +define <2 x float> @test_sitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 { +; CHECK-LABEL: test_sitofp_2xi32_to_2xfloat( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sitofp_2xi32_to_2xfloat_param_0]; +; CHECK-NEXT: cvt.rn.f32.s32 %r3, %r2; +; CHECK-NEXT: cvt.rn.f32.s32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = sitofp <2 x i32> %a to <2 x float> + ret <2 x float> %r +} + +define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 { +; CHECK-LABEL: test_uitofp_2xi32_to_2xfloat( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_to_2xfloat_param_0]; +; CHECK-NEXT: cvt.rn.f32.u32 %r3, %r2; +; CHECK-NEXT: cvt.rn.f32.u32 %r4, %r1; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; +; CHECK-NEXT: ret; + %r = uitofp <2 x i32> %a to <2 x float> + ret <2 x float> %r +} + +attributes #0 = { nounwind } +attributes #1 = { "unsafe-fp-math" = "true" } +attributes #2 = { "denormal-fp-math"="preserve-sign" } diff --git a/llvm/test/CodeGen/NVPTX/fp-contract-f32x2.ll b/llvm/test/CodeGen/NVPTX/fp-contract-f32x2.ll new file mode 100644 index 0000000000000..dc0ec0ff7bb0b --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fp-contract-f32x2.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,FAST +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 | FileCheck %s --check-prefixes=CHECK,DEFAULT +; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -fp-contract=fast | %ptxas-verify -arch sm_100 %} +; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 | %ptxas-verify -arch sm_100 %} + +target triple = "nvptx64-unknown-cuda" + +;; FAST-LABEL: @t0 +;; DEFAULT-LABEL: @t0 +define <2 x float> @t0(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; FAST-LABEL: t0( +; FAST: { +; FAST-NEXT: .reg .b64 %rd<5>; +; FAST-EMPTY: +; FAST-NEXT: // %bb.0: +; FAST-NEXT: ld.param.b64 %rd1, [t0_param_0]; +; FAST-NEXT: ld.param.b64 %rd2, [t0_param_1]; +; FAST-NEXT: ld.param.b64 %rd3, [t0_param_2]; +; FAST-NEXT: fma.rn.f32x2 %rd4, %rd1, %rd2, %rd3; +; FAST-NEXT: st.param.b64 [func_retval0], %rd4; +; FAST-NEXT: ret; +; +; DEFAULT-LABEL: t0( +; DEFAULT: { +; DEFAULT-NEXT: .reg .b64 %rd<6>; +; DEFAULT-EMPTY: +; DEFAULT-NEXT: // %bb.0: +; DEFAULT-NEXT: ld.param.b64 %rd1, [t0_param_0]; +; DEFAULT-NEXT: ld.param.b64 %rd2, [t0_param_1]; +; DEFAULT-NEXT: mul.rn.f32x2 %rd3, %rd1, %rd2; +; DEFAULT-NEXT: ld.param.b64 %rd4, [t0_param_2]; +; DEFAULT-NEXT: add.rn.f32x2 %rd5, %rd3, %rd4; +; DEFAULT-NEXT: st.param.b64 [func_retval0], %rd5; +; DEFAULT-NEXT: ret; + %v0 = fmul <2 x float> %a, %b + %v1 = fadd <2 x float> %v0, %c + ret <2 x float> %v1 +} + +;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32x2 +;; to prevent ptxas from fusing this with anything else. +define <2 x float> @t1(<2 x float> %a, <2 x float> %b) { +; FAST-LABEL: t1( +; FAST: { +; FAST-NEXT: .reg .b64 %rd<6>; +; FAST-EMPTY: +; FAST-NEXT: // %bb.0: +; FAST-NEXT: ld.param.b64 %rd1, [t1_param_0]; +; FAST-NEXT: ld.param.b64 %rd2, [t1_param_1]; +; FAST-NEXT: add.f32x2 %rd3, %rd1, %rd2; +; FAST-NEXT: sub.f32x2 %rd4, %rd1, %rd2; +; FAST-NEXT: mul.f32x2 %rd5, %rd3, %rd4; +; FAST-NEXT: st.param.b64 [func_retval0], %rd5; +; FAST-NEXT: ret; +; +; DEFAULT-LABEL: t1( +; DEFAULT: { +; DEFAULT-NEXT: .reg .b64 %rd<6>; +; DEFAULT-EMPTY: +; DEFAULT-NEXT: // %bb.0: +; DEFAULT-NEXT: ld.param.b64 %rd1, [t1_param_0]; +; DEFAULT-NEXT: ld.param.b64 %rd2, [t1_param_1]; +; DEFAULT-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; +; DEFAULT-NEXT: sub.rn.f32x2 %rd4, %rd1, %rd2; +; DEFAULT-NEXT: mul.rn.f32x2 %rd5, %rd3, %rd4; +; DEFAULT-NEXT: st.param.b64 [func_retval0], %rd5; +; DEFAULT-NEXT: ret; + %v1 = fadd <2 x float> %a, %b + %v2 = fsub <2 x float> %a, %b + %v3 = fmul <2 x float> %v1, %v2 + ret <2 x float> %v3 +} + +;; Make sure we generate the non ".rn" version when the "contract" flag is +;; present on the instructions +define <2 x float> @t2(<2 x float> %a, <2 x float> %b) { +; CHECK-LABEL: t2( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [t2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [t2_param_1]; +; CHECK-NEXT: add.f32x2 %rd3, %rd1, %rd2; +; CHECK-NEXT: sub.f32x2 %rd4, %rd1, %rd2; +; CHECK-NEXT: mul.f32x2 %rd5, %rd3, %rd4; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd5; +; CHECK-NEXT: ret; + %v1 = fadd contract <2 x float> %a, %b + %v2 = fsub contract <2 x float> %a, %b + %v3 = fmul contract <2 x float> %v1, %v2 + ret <2 x float> %v3 +} + +;; Make sure we always fold to fma when the "contract" flag is present +define <2 x float> @t3(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: t3( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [t3_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [t3_param_1]; +; CHECK-NEXT: ld.param.b64 %rd3, [t3_param_2]; +; CHECK-NEXT: fma.rn.f32x2 %rd4, %rd1, %rd2, %rd3; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; +; CHECK-NEXT: ret; + %v0 = fmul contract <2 x float> %a, %b + %v1 = fadd contract <2 x float> %v0, %c + ret <2 x float> %v1 +} diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index 2b7a06c33d948..1a61498b10142 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -32,57 +32,31 @@ define <2 x i16> @test_ret_const() #0 { } define i16 @test_extract_0(<2 x i16> %a) #0 { -; I16x2-LABEL: test_extract_0( -; I16x2: { -; I16x2-NEXT: .reg .b16 %rs<2>; -; I16x2-NEXT: .reg .b32 %r<3>; -; I16x2-EMPTY: -; I16x2-NEXT: // %bb.0: -; I16x2-NEXT: ld.param.b32 %r1, [test_extract_0_param_0]; -; I16x2-NEXT: mov.b32 {%rs1, _}, %r1; -; I16x2-NEXT: cvt.u32.u16 %r2, %rs1; -; I16x2-NEXT: st.param.b32 [func_retval0], %r2; -; I16x2-NEXT: ret; -; -; NO-I16x2-LABEL: test_extract_0( -; NO-I16x2: { -; NO-I16x2-NEXT: .reg .b16 %rs<2>; -; NO-I16x2-NEXT: .reg .b32 %r<3>; -; NO-I16x2-EMPTY: -; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_extract_0_param_0]; -; NO-I16x2-NEXT: { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; } -; NO-I16x2-NEXT: cvt.u32.u16 %r2, %rs1; -; NO-I16x2-NEXT: st.param.b32 [func_retval0], %r2; -; NO-I16x2-NEXT: ret; +; COMMON-LABEL: test_extract_0( +; COMMON: { +; COMMON-NEXT: .reg .b16 %rs<3>; +; COMMON-NEXT: .reg .b32 %r<3>; +; COMMON-EMPTY: +; COMMON-NEXT: // %bb.0: +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_0_param_0]; +; COMMON-NEXT: cvt.u32.u16 %r2, %rs1; +; COMMON-NEXT: st.param.b32 [func_retval0], %r2; +; COMMON-NEXT: ret; %e = extractelement <2 x i16> %a, i32 0 ret i16 %e } define i16 @test_extract_1(<2 x i16> %a) #0 { -; I16x2-LABEL: test_extract_1( -; I16x2: { -; I16x2-NEXT: .reg .b16 %rs<2>; -; I16x2-NEXT: .reg .b32 %r<3>; -; I16x2-EMPTY: -; I16x2-NEXT: // %bb.0: -; I16x2-NEXT: ld.param.b32 %r1, [test_extract_1_param_0]; -; I16x2-NEXT: mov.b32 {_, %rs1}, %r1; -; I16x2-NEXT: cvt.u32.u16 %r2, %rs1; -; I16x2-NEXT: st.param.b32 [func_retval0], %r2; -; I16x2-NEXT: ret; -; -; NO-I16x2-LABEL: test_extract_1( -; NO-I16x2: { -; NO-I16x2-NEXT: .reg .b16 %rs<2>; -; NO-I16x2-NEXT: .reg .b32 %r<3>; -; NO-I16x2-EMPTY: -; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_extract_1_param_0]; -; NO-I16x2-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; } -; NO-I16x2-NEXT: cvt.u32.u16 %r2, %rs1; -; NO-I16x2-NEXT: st.param.b32 [func_retval0], %r2; -; NO-I16x2-NEXT: ret; +; COMMON-LABEL: test_extract_1( +; COMMON: { +; COMMON-NEXT: .reg .b16 %rs<3>; +; COMMON-NEXT: .reg .b32 %r<3>; +; COMMON-EMPTY: +; COMMON-NEXT: // %bb.0: +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_1_param_0]; +; COMMON-NEXT: cvt.u32.u16 %r2, %rs2; +; COMMON-NEXT: st.param.b32 [func_retval0], %r2; +; COMMON-NEXT: ret; %e = extractelement <2 x i16> %a, i32 1 ret i16 %e } @@ -97,9 +71,8 @@ define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 { ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: ; COMMON-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1]; -; COMMON-NEXT: ld.param.b32 %r1, [test_extract_i_param_0]; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_i_param_0]; ; COMMON-NEXT: setp.eq.b64 %p1, %rd1, 0; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; COMMON-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; COMMON-NEXT: cvt.u32.u16 %r2, %rs3; ; COMMON-NEXT: st.param.b32 [func_retval0], %r2; @@ -126,12 +99,10 @@ define <2 x i16> @test_add(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-NEXT: .reg .b32 %r<3>; ; NO-I16x2-EMPTY: ; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r2, [test_add_param_1]; -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_add_param_0]; -; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; NO-I16x2-NEXT: add.s16 %rs5, %rs4, %rs2; -; NO-I16x2-NEXT: add.s16 %rs6, %rs3, %rs1; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_param_0]; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_add_param_1]; +; NO-I16x2-NEXT: add.s16 %rs5, %rs2, %rs4; +; NO-I16x2-NEXT: add.s16 %rs6, %rs1, %rs3; ; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; NO-I16x2-NEXT: ret; %r = add <2 x i16> %a, %b @@ -157,8 +128,7 @@ define <2 x i16> @test_add_imm_0(<2 x i16> %a) #0 { ; NO-I16x2-NEXT: .reg .b32 %r<2>; ; NO-I16x2-EMPTY: ; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_add_imm_0_param_0]; -; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_imm_0_param_0]; ; NO-I16x2-NEXT: add.s16 %rs3, %rs2, 2; ; NO-I16x2-NEXT: add.s16 %rs4, %rs1, 1; ; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -185,8 +155,7 @@ define <2 x i16> @test_add_imm_1(<2 x i16> %a) #0 { ; NO-I16x2-NEXT: .reg .b32 %r<2>; ; NO-I16x2-EMPTY: ; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_add_imm_1_param_0]; -; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_imm_1_param_0]; ; NO-I16x2-NEXT: add.s16 %rs3, %rs2, 2; ; NO-I16x2-NEXT: add.s16 %rs4, %rs1, 1; ; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -202,12 +171,10 @@ define <2 x i16> @test_sub(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: .reg .b32 %r<3>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: ld.param.b32 %r2, [test_sub_param_1]; -; COMMON-NEXT: ld.param.b32 %r1, [test_sub_param_0]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; COMMON-NEXT: sub.s16 %rs5, %rs4, %rs2; -; COMMON-NEXT: sub.s16 %rs6, %rs3, %rs1; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sub_param_0]; +; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_sub_param_1]; +; COMMON-NEXT: sub.s16 %rs5, %rs2, %rs4; +; COMMON-NEXT: sub.s16 %rs6, %rs1, %rs3; ; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; COMMON-NEXT: ret; %r = sub <2 x i16> %a, %b @@ -232,12 +199,10 @@ define <2 x i16> @test_smax(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-NEXT: .reg .b32 %r<3>; ; NO-I16x2-EMPTY: ; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r2, [test_smax_param_1]; -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_smax_param_0]; -; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; NO-I16x2-NEXT: max.s16 %rs5, %rs4, %rs2; -; NO-I16x2-NEXT: max.s16 %rs6, %rs3, %rs1; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_smax_param_0]; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_smax_param_1]; +; NO-I16x2-NEXT: max.s16 %rs5, %rs2, %rs4; +; NO-I16x2-NEXT: max.s16 %rs6, %rs1, %rs3; ; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; NO-I16x2-NEXT: ret; %cmp = icmp sgt <2 x i16> %a, %b @@ -263,12 +228,10 @@ define <2 x i16> @test_umax(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-NEXT: .reg .b32 %r<3>; ; NO-I16x2-EMPTY: ; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r2, [test_umax_param_1]; -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_umax_param_0]; -; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; NO-I16x2-NEXT: max.u16 %rs5, %rs4, %rs2; -; NO-I16x2-NEXT: max.u16 %rs6, %rs3, %rs1; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_umax_param_0]; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_umax_param_1]; +; NO-I16x2-NEXT: max.u16 %rs5, %rs2, %rs4; +; NO-I16x2-NEXT: max.u16 %rs6, %rs1, %rs3; ; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; NO-I16x2-NEXT: ret; %cmp = icmp ugt <2 x i16> %a, %b @@ -294,12 +257,10 @@ define <2 x i16> @test_smin(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-NEXT: .reg .b32 %r<3>; ; NO-I16x2-EMPTY: ; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r2, [test_smin_param_1]; -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_smin_param_0]; -; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; NO-I16x2-NEXT: min.s16 %rs5, %rs4, %rs2; -; NO-I16x2-NEXT: min.s16 %rs6, %rs3, %rs1; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_smin_param_0]; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_smin_param_1]; +; NO-I16x2-NEXT: min.s16 %rs5, %rs2, %rs4; +; NO-I16x2-NEXT: min.s16 %rs6, %rs1, %rs3; ; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; NO-I16x2-NEXT: ret; %cmp = icmp sle <2 x i16> %a, %b @@ -325,12 +286,10 @@ define <2 x i16> @test_umin(<2 x i16> %a, <2 x i16> %b) #0 { ; NO-I16x2-NEXT: .reg .b32 %r<3>; ; NO-I16x2-EMPTY: ; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b32 %r2, [test_umin_param_1]; -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_umin_param_0]; -; NO-I16x2-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; NO-I16x2-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; NO-I16x2-NEXT: min.u16 %rs5, %rs4, %rs2; -; NO-I16x2-NEXT: min.u16 %rs6, %rs3, %rs1; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_umin_param_0]; +; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_umin_param_1]; +; NO-I16x2-NEXT: min.u16 %rs5, %rs2, %rs4; +; NO-I16x2-NEXT: min.u16 %rs6, %rs1, %rs3; ; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; NO-I16x2-NEXT: ret; %cmp = icmp ule <2 x i16> %a, %b @@ -345,12 +304,10 @@ define <2 x i16> @test_mul(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: .reg .b32 %r<3>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: ld.param.b32 %r2, [test_mul_param_1]; -; COMMON-NEXT: ld.param.b32 %r1, [test_mul_param_0]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; COMMON-NEXT: mul.lo.s16 %rs5, %rs4, %rs2; -; COMMON-NEXT: mul.lo.s16 %rs6, %rs3, %rs1; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_mul_param_0]; +; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_mul_param_1]; +; COMMON-NEXT: mul.lo.s16 %rs5, %rs2, %rs4; +; COMMON-NEXT: mul.lo.s16 %rs6, %rs1, %rs3; ; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; COMMON-NEXT: ret; %r = mul <2 x i16> %a, %b @@ -729,18 +686,14 @@ define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x ; COMMON-NEXT: .reg .b32 %r<5>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: ld.param.b32 %r4, [test_select_cc_param_3]; -; COMMON-NEXT: ld.param.b32 %r3, [test_select_cc_param_2]; -; COMMON-NEXT: ld.param.b32 %r2, [test_select_cc_param_1]; -; COMMON-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r4; -; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r3; -; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1; -; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2; -; COMMON-NEXT: mov.b32 {%rs5, %rs6}, %r2; -; COMMON-NEXT: mov.b32 {%rs7, %rs8}, %r1; -; COMMON-NEXT: selp.b16 %rs9, %rs8, %rs6, %p2; -; COMMON-NEXT: selp.b16 %rs10, %rs7, %rs5, %p1; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0]; +; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_2]; +; COMMON-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_select_cc_param_3]; +; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs5; +; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs6; +; COMMON-NEXT: ld.param.v2.b16 {%rs7, %rs8}, [test_select_cc_param_1]; +; COMMON-NEXT: selp.b16 %rs9, %rs2, %rs8, %p2; +; COMMON-NEXT: selp.b16 %rs10, %rs1, %rs7, %p1; ; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs9}; ; COMMON-NEXT: ret; %cc = icmp ne <2 x i16> %c, %d @@ -758,12 +711,10 @@ define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b, ; COMMON-NEXT: // %bb.0: ; COMMON-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i32_i16_param_1]; ; COMMON-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_i32_i16_param_0]; -; COMMON-NEXT: ld.param.b32 %r6, [test_select_cc_i32_i16_param_3]; -; COMMON-NEXT: ld.param.b32 %r5, [test_select_cc_i32_i16_param_2]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r6; -; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r5; -; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1; -; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_i32_i16_param_2]; +; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_i32_i16_param_3]; +; COMMON-NEXT: setp.ne.b16 %p1, %rs1, %rs3; +; COMMON-NEXT: setp.ne.b16 %p2, %rs2, %rs4; ; COMMON-NEXT: selp.b32 %r7, %r2, %r4, %p2; ; COMMON-NEXT: selp.b32 %r8, %r1, %r3, %p1; ; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; @@ -784,14 +735,12 @@ define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b, ; COMMON-NEXT: // %bb.0: ; COMMON-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_i16_i32_param_3]; ; COMMON-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i16_i32_param_2]; -; COMMON-NEXT: ld.param.b32 %r2, [test_select_cc_i16_i32_param_1]; -; COMMON-NEXT: ld.param.b32 %r1, [test_select_cc_i16_i32_param_0]; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_i16_i32_param_0]; ; COMMON-NEXT: setp.ne.b32 %p1, %r3, %r5; ; COMMON-NEXT: setp.ne.b32 %p2, %r4, %r6; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r1; -; COMMON-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2; -; COMMON-NEXT: selp.b16 %rs6, %rs3, %rs1, %p1; +; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_i16_i32_param_1]; +; COMMON-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2; +; COMMON-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1; ; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; COMMON-NEXT: ret; <2 x i32> %c, <2 x i32> %d) #0 { @@ -902,8 +851,7 @@ define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 { ; COMMON-NEXT: .reg .b32 %r<4>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: ld.param.b32 %r1, [test_zext_2xi32_param_0]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_zext_2xi32_param_0]; ; COMMON-NEXT: cvt.u32.u16 %r2, %rs2; ; COMMON-NEXT: cvt.u32.u16 %r3, %rs1; ; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; @@ -920,8 +868,7 @@ define <2 x i64> @test_zext_2xi64(<2 x i16> %a) #0 { ; COMMON-NEXT: .reg .b64 %rd<3>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: ld.param.b32 %r1, [test_zext_2xi64_param_0]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_zext_2xi64_param_0]; ; COMMON-NEXT: cvt.u64.u16 %rd1, %rs2; ; COMMON-NEXT: cvt.u64.u16 %rd2, %rs1; ; COMMON-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; @@ -979,8 +926,7 @@ define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 { ; COMMON-NEXT: .reg .b32 %r<2>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: ld.param.b32 %r1, [test_shufflevector_param_0]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_shufflevector_param_0]; ; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; ; COMMON-NEXT: ret; %s = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> @@ -988,29 +934,16 @@ define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 { } define <2 x i16> @test_insertelement(<2 x i16> %a, i16 %x) #0 { -; I16x2-LABEL: test_insertelement( -; I16x2: { -; I16x2-NEXT: .reg .b16 %rs<3>; -; I16x2-NEXT: .reg .b32 %r<2>; -; I16x2-EMPTY: -; I16x2-NEXT: // %bb.0: -; I16x2-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1]; -; I16x2-NEXT: ld.param.b32 %r1, [test_insertelement_param_0]; -; I16x2-NEXT: mov.b32 {%rs2, _}, %r1; -; I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; -; I16x2-NEXT: ret; -; -; NO-I16x2-LABEL: test_insertelement( -; NO-I16x2: { -; NO-I16x2-NEXT: .reg .b16 %rs<3>; -; NO-I16x2-NEXT: .reg .b32 %r<2>; -; NO-I16x2-EMPTY: -; NO-I16x2-NEXT: // %bb.0: -; NO-I16x2-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1]; -; NO-I16x2-NEXT: ld.param.b32 %r1, [test_insertelement_param_0]; -; NO-I16x2-NEXT: { .reg .b16 tmp; mov.b32 {%rs2, tmp}, %r1; } -; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; -; NO-I16x2-NEXT: ret; +; COMMON-LABEL: test_insertelement( +; COMMON: { +; COMMON-NEXT: .reg .b16 %rs<4>; +; COMMON-NEXT: .reg .b32 %r<2>; +; COMMON-EMPTY: +; COMMON-NEXT: // %bb.0: +; COMMON-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1]; +; COMMON-NEXT: ld.param.v2.b16 {%rs2, %rs3}, [test_insertelement_param_0]; +; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; +; COMMON-NEXT: ret; %i = insertelement <2 x i16> %a, i16 %x, i64 1 ret <2 x i16> %i } @@ -1022,8 +955,7 @@ define <2 x i16> @test_fptosi_2xhalf_to_2xi16(<2 x half> %a) #0 { ; COMMON-NEXT: .reg .b32 %r<2>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: ld.param.b32 %r1, [test_fptosi_2xhalf_to_2xi16_param_0]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptosi_2xhalf_to_2xi16_param_0]; ; COMMON-NEXT: cvt.rzi.s16.f16 %rs3, %rs2; ; COMMON-NEXT: cvt.rzi.s16.f16 %rs4, %rs1; ; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -1039,8 +971,7 @@ define <2 x i16> @test_fptoui_2xhalf_to_2xi16(<2 x half> %a) #0 { ; COMMON-NEXT: .reg .b32 %r<2>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: ld.param.b32 %r1, [test_fptoui_2xhalf_to_2xi16_param_0]; -; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptoui_2xhalf_to_2xi16_param_0]; ; COMMON-NEXT: cvt.rzi.u16.f16 %rs3, %rs2; ; COMMON-NEXT: cvt.rzi.u16.f16 %rs4, %rs1; ; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 328da60a1f783..1fc42d6cc02c0 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -1240,18 +1240,16 @@ define <4 x i8> @test_fptosi_4xhalf_to_4xi8(<4 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<12>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptosi_4xhalf_to_4xi8_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NEXT: cvt.rzi.s16.f16 %rs3, %rs2; -; CHECK-NEXT: cvt.rzi.s16.f16 %rs4, %rs1; -; CHECK-NEXT: mov.b32 %r3, {%rs4, %rs3}; -; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r3; -; CHECK-NEXT: cvt.u32.u16 %r4, %rs6; -; CHECK-NEXT: cvt.u32.u16 %r5, %rs5; +; CHECK-NEXT: ld.param.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [test_fptosi_4xhalf_to_4xi8_param_0]; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs5, %rs4; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs6, %rs3; +; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5}; +; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r3; +; CHECK-NEXT: cvt.u32.u16 %r4, %rs8; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs7; ; CHECK-NEXT: prmt.b32 %r6, %r5, %r4, 0x3340U; -; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r1; -; CHECK-NEXT: cvt.rzi.s16.f16 %rs9, %rs8; -; CHECK-NEXT: cvt.rzi.s16.f16 %rs10, %rs7; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs9, %rs2; +; CHECK-NEXT: cvt.rzi.s16.f16 %rs10, %rs1; ; CHECK-NEXT: mov.b32 %r7, {%rs10, %rs9}; ; CHECK-NEXT: mov.b32 {%rs11, %rs12}, %r7; ; CHECK-NEXT: cvt.u32.u16 %r8, %rs12; @@ -1271,18 +1269,16 @@ define <4 x i8> @test_fptoui_4xhalf_to_4xi8(<4 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<12>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptoui_4xhalf_to_4xi8_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NEXT: cvt.rzi.u16.f16 %rs3, %rs2; -; CHECK-NEXT: cvt.rzi.u16.f16 %rs4, %rs1; -; CHECK-NEXT: mov.b32 %r3, {%rs4, %rs3}; -; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r3; -; CHECK-NEXT: cvt.u32.u16 %r4, %rs6; -; CHECK-NEXT: cvt.u32.u16 %r5, %rs5; +; CHECK-NEXT: ld.param.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [test_fptoui_4xhalf_to_4xi8_param_0]; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs5, %rs4; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs6, %rs3; +; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5}; +; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r3; +; CHECK-NEXT: cvt.u32.u16 %r4, %rs8; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs7; ; CHECK-NEXT: prmt.b32 %r6, %r5, %r4, 0x3340U; -; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r1; -; CHECK-NEXT: cvt.rzi.u16.f16 %rs9, %rs8; -; CHECK-NEXT: cvt.rzi.u16.f16 %rs10, %rs7; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs9, %rs2; +; CHECK-NEXT: cvt.rzi.u16.f16 %rs10, %rs1; ; CHECK-NEXT: mov.b32 %r7, {%rs10, %rs9}; ; CHECK-NEXT: mov.b32 {%rs11, %rs12}, %r7; ; CHECK-NEXT: cvt.u32.u16 %r8, %rs12; diff --git a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll index 9e9705709f2bd..efa2666090ccc 100644 --- a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll +++ b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll @@ -7,17 +7,16 @@ declare <4 x float> @bar() define void @foo(ptr %ptr) { ; CHECK-LABEL: foo( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; -; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 16 .b8 retval0[16]; ; CHECK-NEXT: call.uni (retval0), bar, (); -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [retval0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [retval0]; ; CHECK-NEXT: } // callseq 0 -; CHECK-NEXT: st.v4.b32 [%rd1], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: st.v2.b64 [%rd1], {%rd2, %rd3}; ; CHECK-NEXT: ret; %val = tail call <4 x float> @bar() store <4 x float> %val, ptr %ptr diff --git a/llvm/test/CodeGen/NVPTX/load-store-256-addressing-invariant.ll b/llvm/test/CodeGen/NVPTX/load-store-256-addressing-invariant.ll index a9bd3c1caebe5..187ccc9cd89f7 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-256-addressing-invariant.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-256-addressing-invariant.ll @@ -110,11 +110,11 @@ define void @avar_i64() { define void @avar_float() { ; PTX-LABEL: avar_float( ; PTX: { -; PTX-NEXT: .reg .b32 %r<9>; +; PTX-NEXT: .reg .b64 %rd<5>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: ld.global.nc.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [globalin]; -; PTX-NEXT: st.global.v8.b32 [globalout], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; PTX-NEXT: ld.global.nc.v4.b64 {%rd1, %rd2, %rd3, %rd4}, [globalin]; +; PTX-NEXT: st.global.v4.b64 [globalout], {%rd1, %rd2, %rd3, %rd4}; ; PTX-NEXT: ret; %load = load <8 x float>, ptr addrspace(1) @globalin, !invariant.load !0 store <8 x float> %load, ptr addrspace(1) @globalout @@ -234,11 +234,11 @@ define void @asi_i64() { define void @asi_float() { ; PTX-LABEL: asi_float( ; PTX: { -; PTX-NEXT: .reg .b32 %r<9>; +; PTX-NEXT: .reg .b64 %rd<5>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: ld.global.nc.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [globalin+32]; -; PTX-NEXT: st.global.v8.b32 [globalout+32], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; PTX-NEXT: ld.global.nc.v4.b64 {%rd1, %rd2, %rd3, %rd4}, [globalin+32]; +; PTX-NEXT: st.global.v4.b64 [globalout+32], {%rd1, %rd2, %rd3, %rd4}; ; PTX-NEXT: ret; %in.offset = getelementptr inbounds i8, ptr addrspace(1) @globalin, i32 32 %load = load <8 x float>, ptr addrspace(1) %in.offset, !invariant.load !0 @@ -364,14 +364,13 @@ define void @areg_64_i64(ptr addrspace(1) %in, ptr addrspace(1) %out) { define void @areg_64_float(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; PTX-LABEL: areg_64_float( ; PTX: { -; PTX-NEXT: .reg .b32 %r<9>; -; PTX-NEXT: .reg .b64 %rd<3>; +; PTX-NEXT: .reg .b64 %rd<7>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: ld.param.b64 %rd1, [areg_64_float_param_0]; -; PTX-NEXT: ld.global.nc.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1]; -; PTX-NEXT: ld.param.b64 %rd2, [areg_64_float_param_1]; -; PTX-NEXT: st.global.v8.b32 [%rd2], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; PTX-NEXT: ld.global.nc.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1]; +; PTX-NEXT: ld.param.b64 %rd6, [areg_64_float_param_1]; +; PTX-NEXT: st.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5}; ; PTX-NEXT: ret; %load = load <8 x float>, ptr addrspace(1) %in, !invariant.load !0 store <8 x float> %load, ptr addrspace(1) %out @@ -510,14 +509,13 @@ define void @ari_64_i64(ptr addrspace(1) %in, ptr addrspace(1) %out) { define void @ari_64_float(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; PTX-LABEL: ari_64_float( ; PTX: { -; PTX-NEXT: .reg .b32 %r<9>; -; PTX-NEXT: .reg .b64 %rd<3>; +; PTX-NEXT: .reg .b64 %rd<7>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: ld.param.b64 %rd1, [ari_64_float_param_0]; ; PTX-NEXT: ld.param.b64 %rd2, [ari_64_float_param_1]; -; PTX-NEXT: ld.global.nc.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1+32]; -; PTX-NEXT: st.global.v8.b32 [%rd2+32], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; PTX-NEXT: ld.global.nc.v4.b64 {%rd3, %rd4, %rd5, %rd6}, [%rd1+32]; +; PTX-NEXT: st.global.v4.b64 [%rd2+32], {%rd3, %rd4, %rd5, %rd6}; ; PTX-NEXT: ret; %in.offset = getelementptr inbounds i8, ptr addrspace(1) %in, i32 32 %load = load <8 x float>, ptr addrspace(1) %in.offset, !invariant.load !0 diff --git a/llvm/test/CodeGen/NVPTX/load-store-256-addressing.ll b/llvm/test/CodeGen/NVPTX/load-store-256-addressing.ll index 45e17016d8ee8..a17df1ee39883 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-256-addressing.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-256-addressing.ll @@ -106,11 +106,11 @@ define void @avar_i64() { define void @avar_float() { ; PTX-LABEL: avar_float( ; PTX: { -; PTX-NEXT: .reg .b32 %r<9>; +; PTX-NEXT: .reg .b64 %rd<5>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: ld.global.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [globalin]; -; PTX-NEXT: st.global.v8.b32 [globalout], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; PTX-NEXT: ld.global.v4.b64 {%rd1, %rd2, %rd3, %rd4}, [globalin]; +; PTX-NEXT: st.global.v4.b64 [globalout], {%rd1, %rd2, %rd3, %rd4}; ; PTX-NEXT: ret; %load = load <8 x float>, ptr addrspace(1) @globalin store <8 x float> %load, ptr addrspace(1) @globalout @@ -230,11 +230,11 @@ define void @asi_i64() { define void @asi_float() { ; PTX-LABEL: asi_float( ; PTX: { -; PTX-NEXT: .reg .b32 %r<9>; +; PTX-NEXT: .reg .b64 %rd<5>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: ld.global.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [globalin+32]; -; PTX-NEXT: st.global.v8.b32 [globalout+32], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; PTX-NEXT: ld.global.v4.b64 {%rd1, %rd2, %rd3, %rd4}, [globalin+32]; +; PTX-NEXT: st.global.v4.b64 [globalout+32], {%rd1, %rd2, %rd3, %rd4}; ; PTX-NEXT: ret; %in.offset = getelementptr inbounds i8, ptr addrspace(1) @globalin, i32 32 %load = load <8 x float>, ptr addrspace(1) %in.offset @@ -360,14 +360,13 @@ define void @areg_64_i64(ptr addrspace(1) %in, ptr addrspace(1) %out) { define void @areg_64_float(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; PTX-LABEL: areg_64_float( ; PTX: { -; PTX-NEXT: .reg .b32 %r<9>; -; PTX-NEXT: .reg .b64 %rd<3>; +; PTX-NEXT: .reg .b64 %rd<7>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: ld.param.b64 %rd1, [areg_64_float_param_0]; -; PTX-NEXT: ld.global.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1]; -; PTX-NEXT: ld.param.b64 %rd2, [areg_64_float_param_1]; -; PTX-NEXT: st.global.v8.b32 [%rd2], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; PTX-NEXT: ld.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1]; +; PTX-NEXT: ld.param.b64 %rd6, [areg_64_float_param_1]; +; PTX-NEXT: st.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5}; ; PTX-NEXT: ret; %load = load <8 x float>, ptr addrspace(1) %in store <8 x float> %load, ptr addrspace(1) %out @@ -506,14 +505,13 @@ define void @ari_64_i64(ptr addrspace(1) %in, ptr addrspace(1) %out) { define void @ari_64_float(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; PTX-LABEL: ari_64_float( ; PTX: { -; PTX-NEXT: .reg .b32 %r<9>; -; PTX-NEXT: .reg .b64 %rd<3>; +; PTX-NEXT: .reg .b64 %rd<7>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: ld.param.b64 %rd1, [ari_64_float_param_0]; ; PTX-NEXT: ld.param.b64 %rd2, [ari_64_float_param_1]; -; PTX-NEXT: ld.global.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1+32]; -; PTX-NEXT: st.global.v8.b32 [%rd2+32], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; PTX-NEXT: ld.global.v4.b64 {%rd3, %rd4, %rd5, %rd6}, [%rd1+32]; +; PTX-NEXT: st.global.v4.b64 [%rd2+32], {%rd3, %rd4, %rd5, %rd6}; ; PTX-NEXT: ret; %in.offset = getelementptr inbounds i8, ptr addrspace(1) %in, i32 32 %load = load <8 x float>, ptr addrspace(1) %in.offset diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll index dfbc2c34b15d4..68c53cde7f9ac 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll @@ -139,16 +139,15 @@ define void @generic_4xi64(ptr %a, ptr %b) { define void @generic_8xfloat(ptr %a, ptr %b) { ; CHECK-LABEL: generic_8xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; -; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-NEXT: .reg .b64 %rd<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_8xfloat_param_0]; -; CHECK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; -; CHECK-NEXT: ld.param.b64 %rd2, [generic_8xfloat_param_1]; -; CHECK-NEXT: st.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ld.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.param.b64 %rd6, [generic_8xfloat_param_1]; +; CHECK-NEXT: st.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load <8 x float>, ptr %a store <8 x float> %a.load, ptr %b @@ -291,16 +290,15 @@ define void @generic_volatile_4xi64(ptr %a, ptr %b) { define void @generic_volatile_8xfloat(ptr %a, ptr %b) { ; CHECK-LABEL: generic_volatile_8xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; -; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-NEXT: .reg .b64 %rd<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_8xfloat_param_0]; -; CHECK-NEXT: ld.volatile.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.volatile.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; -; CHECK-NEXT: ld.param.b64 %rd2, [generic_volatile_8xfloat_param_1]; -; CHECK-NEXT: st.volatile.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.volatile.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ld.volatile.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.volatile.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.param.b64 %rd6, [generic_volatile_8xfloat_param_1]; +; CHECK-NEXT: st.volatile.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.volatile.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load volatile <8 x float>, ptr %a store volatile <8 x float> %a.load, ptr %b @@ -516,28 +514,26 @@ define void @global_4xi64(ptr addrspace(1) %a, ptr addrspace(1) %b) { define void @global_8xfloat(ptr addrspace(1) %a, ptr addrspace(1) %b) { ; SM90-LABEL: global_8xfloat( ; SM90: { -; SM90-NEXT: .reg .b32 %r<9>; -; SM90-NEXT: .reg .b64 %rd<3>; +; SM90-NEXT: .reg .b64 %rd<7>; ; SM90-EMPTY: ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.b64 %rd1, [global_8xfloat_param_0]; -; SM90-NEXT: ld.global.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM90-NEXT: ld.global.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; -; SM90-NEXT: ld.param.b64 %rd2, [global_8xfloat_param_1]; -; SM90-NEXT: st.global.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; SM90-NEXT: st.global.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; SM90-NEXT: ld.global.v2.b64 {%rd2, %rd3}, [%rd1]; +; SM90-NEXT: ld.global.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; SM90-NEXT: ld.param.b64 %rd6, [global_8xfloat_param_1]; +; SM90-NEXT: st.global.v2.b64 [%rd6+16], {%rd4, %rd5}; +; SM90-NEXT: st.global.v2.b64 [%rd6], {%rd2, %rd3}; ; SM90-NEXT: ret; ; ; SM100-LABEL: global_8xfloat( ; SM100: { -; SM100-NEXT: .reg .b32 %r<9>; -; SM100-NEXT: .reg .b64 %rd<3>; +; SM100-NEXT: .reg .b64 %rd<7>; ; SM100-EMPTY: ; SM100-NEXT: // %bb.0: ; SM100-NEXT: ld.param.b64 %rd1, [global_8xfloat_param_0]; -; SM100-NEXT: ld.global.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1]; -; SM100-NEXT: ld.param.b64 %rd2, [global_8xfloat_param_1]; -; SM100-NEXT: st.global.v8.b32 [%rd2], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; SM100-NEXT: ld.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1]; +; SM100-NEXT: ld.param.b64 %rd6, [global_8xfloat_param_1]; +; SM100-NEXT: st.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5}; ; SM100-NEXT: ret; %a.load = load <8 x float>, ptr addrspace(1) %a store <8 x float> %a.load, ptr addrspace(1) %b @@ -762,28 +758,26 @@ define void @global_volatile_4xi64(ptr addrspace(1) %a, ptr addrspace(1) %b) { define void @global_volatile_8xfloat(ptr addrspace(1) %a, ptr addrspace(1) %b) { ; SM90-LABEL: global_volatile_8xfloat( ; SM90: { -; SM90-NEXT: .reg .b32 %r<9>; -; SM90-NEXT: .reg .b64 %rd<3>; +; SM90-NEXT: .reg .b64 %rd<7>; ; SM90-EMPTY: ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.b64 %rd1, [global_volatile_8xfloat_param_0]; -; SM90-NEXT: ld.volatile.global.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM90-NEXT: ld.volatile.global.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; -; SM90-NEXT: ld.param.b64 %rd2, [global_volatile_8xfloat_param_1]; -; SM90-NEXT: st.volatile.global.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; SM90-NEXT: st.volatile.global.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; SM90-NEXT: ld.volatile.global.v2.b64 {%rd2, %rd3}, [%rd1]; +; SM90-NEXT: ld.volatile.global.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; SM90-NEXT: ld.param.b64 %rd6, [global_volatile_8xfloat_param_1]; +; SM90-NEXT: st.volatile.global.v2.b64 [%rd6+16], {%rd4, %rd5}; +; SM90-NEXT: st.volatile.global.v2.b64 [%rd6], {%rd2, %rd3}; ; SM90-NEXT: ret; ; ; SM100-LABEL: global_volatile_8xfloat( ; SM100: { -; SM100-NEXT: .reg .b32 %r<9>; -; SM100-NEXT: .reg .b64 %rd<3>; +; SM100-NEXT: .reg .b64 %rd<7>; ; SM100-EMPTY: ; SM100-NEXT: // %bb.0: ; SM100-NEXT: ld.param.b64 %rd1, [global_volatile_8xfloat_param_0]; -; SM100-NEXT: ld.volatile.global.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1]; -; SM100-NEXT: ld.param.b64 %rd2, [global_volatile_8xfloat_param_1]; -; SM100-NEXT: st.volatile.global.v8.b32 [%rd2], {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}; +; SM100-NEXT: ld.volatile.global.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1]; +; SM100-NEXT: ld.param.b64 %rd6, [global_volatile_8xfloat_param_1]; +; SM100-NEXT: st.volatile.global.v4.b64 [%rd6], {%rd2, %rd3, %rd4, %rd5}; ; SM100-NEXT: ret; %a.load = load volatile <8 x float>, ptr addrspace(1) %a store volatile <8 x float> %a.load, ptr addrspace(1) %b @@ -939,16 +933,15 @@ define void @shared_4xi64(ptr addrspace(3) %a, ptr addrspace(3) %b) { define void @shared_8xfloat(ptr addrspace(3) %a, ptr addrspace(3) %b) { ; CHECK-LABEL: shared_8xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; -; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-NEXT: .reg .b64 %rd<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_8xfloat_param_0]; -; CHECK-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.shared.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; -; CHECK-NEXT: ld.param.b64 %rd2, [shared_8xfloat_param_1]; -; CHECK-NEXT: st.shared.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.shared.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ld.shared.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.shared.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.param.b64 %rd6, [shared_8xfloat_param_1]; +; CHECK-NEXT: st.shared.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.shared.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load <8 x float>, ptr addrspace(3) %a store <8 x float> %a.load, ptr addrspace(3) %b @@ -1091,16 +1084,15 @@ define void @shared_volatile_4xi64(ptr addrspace(3) %a, ptr addrspace(3) %b) { define void @shared_volatile_8xfloat(ptr addrspace(3) %a, ptr addrspace(3) %b) { ; CHECK-LABEL: shared_volatile_8xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; -; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-NEXT: .reg .b64 %rd<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_8xfloat_param_0]; -; CHECK-NEXT: ld.volatile.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.volatile.shared.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; -; CHECK-NEXT: ld.param.b64 %rd2, [shared_volatile_8xfloat_param_1]; -; CHECK-NEXT: st.volatile.shared.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.volatile.shared.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ld.volatile.shared.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.volatile.shared.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.param.b64 %rd6, [shared_volatile_8xfloat_param_1]; +; CHECK-NEXT: st.volatile.shared.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.volatile.shared.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load volatile <8 x float>, ptr addrspace(3) %a store volatile <8 x float> %a.load, ptr addrspace(3) %b @@ -1245,16 +1237,15 @@ define void @local_4xi64(ptr addrspace(5) %a, ptr addrspace(5) %b) { define void @local_8xfloat(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-LABEL: local_8xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; -; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-NEXT: .reg .b64 %rd<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_8xfloat_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; -; CHECK-NEXT: ld.param.b64 %rd2, [local_8xfloat_param_1]; -; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.param.b64 %rd6, [local_8xfloat_param_1]; +; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load <8 x float>, ptr addrspace(5) %a store <8 x float> %a.load, ptr addrspace(5) %b @@ -1397,16 +1388,15 @@ define void @local_volatile_4xi64(ptr addrspace(5) %a, ptr addrspace(5) %b) { define void @local_volatile_8xfloat(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-LABEL: local_volatile_8xfloat( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; -; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-NEXT: .reg .b64 %rd<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xfloat_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; -; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_8xfloat_param_1]; -; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_8xfloat_param_1]; +; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load volatile <8 x float>, ptr addrspace(5) %a store volatile <8 x float> %a.load, ptr addrspace(5) %b diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll index b6a00e03a80ab..ec8dd0c5c9350 100644 --- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll +++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll @@ -333,30 +333,28 @@ define ptx_kernel void @foo10(ptr noalias readonly %from, ptr %to) { define ptx_kernel void @foo11(ptr noalias readonly %from, ptr %to) { ; SM20-LABEL: foo11( ; SM20: { -; SM20-NEXT: .reg .b32 %r<3>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b64 %rd<6>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.b64 %rd1, [foo11_param_0]; ; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1; ; SM20-NEXT: ld.param.b64 %rd3, [foo11_param_1]; ; SM20-NEXT: cvta.to.global.u64 %rd4, %rd3; -; SM20-NEXT: ld.global.v2.b32 {%r1, %r2}, [%rd2]; -; SM20-NEXT: st.global.v2.b32 [%rd4], {%r1, %r2}; +; SM20-NEXT: ld.global.b64 %rd5, [%rd2]; +; SM20-NEXT: st.global.b64 [%rd4], %rd5; ; SM20-NEXT: ret; ; ; SM35-LABEL: foo11( ; SM35: { -; SM35-NEXT: .reg .b32 %r<3>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b64 %rd<6>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.b64 %rd1, [foo11_param_0]; ; SM35-NEXT: cvta.to.global.u64 %rd2, %rd1; ; SM35-NEXT: ld.param.b64 %rd3, [foo11_param_1]; ; SM35-NEXT: cvta.to.global.u64 %rd4, %rd3; -; SM35-NEXT: ld.global.nc.v2.b32 {%r1, %r2}, [%rd2]; -; SM35-NEXT: st.global.v2.b32 [%rd4], {%r1, %r2}; +; SM35-NEXT: ld.global.nc.b64 %rd5, [%rd2]; +; SM35-NEXT: st.global.b64 [%rd4], %rd5; ; SM35-NEXT: ret; %1 = load <2 x float>, ptr %from store <2 x float> %1, ptr %to @@ -496,30 +494,28 @@ define ptx_kernel void @foo15(ptr noalias readonly %from, ptr %to) { define ptx_kernel void @foo16(ptr noalias readonly %from, ptr %to) { ; SM20-LABEL: foo16( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b64 %rd<7>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.b64 %rd1, [foo16_param_0]; ; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1; ; SM20-NEXT: ld.param.b64 %rd3, [foo16_param_1]; ; SM20-NEXT: cvta.to.global.u64 %rd4, %rd3; -; SM20-NEXT: ld.global.v4.b32 {%r1, %r2, %r3, %r4}, [%rd2]; -; SM20-NEXT: st.global.v4.b32 [%rd4], {%r1, %r2, %r3, %r4}; +; SM20-NEXT: ld.global.v2.b64 {%rd5, %rd6}, [%rd2]; +; SM20-NEXT: st.global.v2.b64 [%rd4], {%rd5, %rd6}; ; SM20-NEXT: ret; ; ; SM35-LABEL: foo16( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b64 %rd<7>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.b64 %rd1, [foo16_param_0]; ; SM35-NEXT: cvta.to.global.u64 %rd2, %rd1; ; SM35-NEXT: ld.param.b64 %rd3, [foo16_param_1]; ; SM35-NEXT: cvta.to.global.u64 %rd4, %rd3; -; SM35-NEXT: ld.global.nc.v4.b32 {%r1, %r2, %r3, %r4}, [%rd2]; -; SM35-NEXT: st.global.v4.b32 [%rd4], {%r1, %r2, %r3, %r4}; +; SM35-NEXT: ld.global.nc.v2.b64 {%rd5, %rd6}, [%rd2]; +; SM35-NEXT: st.global.v2.b64 [%rd4], {%rd5, %rd6}; ; SM35-NEXT: ret; %1 = load <4 x float>, ptr %from store <4 x float> %1, ptr %to diff --git a/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll b/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll index db8733da5b7e4..dfdb33852305b 100644 --- a/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll +++ b/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll @@ -1,131 +1,278 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-nvidia-cuda" -; CHECK-LABEL: t1 define <4 x float> @t1(ptr %p1) { -; CHECK-NOT: ld.v4 -; CHECK-NOT: ld.v2 -; CHECK-NOT: ld.b32 -; CHECK: ld.b8 +; CHECK-LABEL: t1( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<46>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [t1_param_0]; +; CHECK-NEXT: ld.b8 %rd2, [%rd1+8]; +; CHECK-NEXT: ld.b8 %rd3, [%rd1+9]; +; CHECK-NEXT: shl.b64 %rd4, %rd3, 8; +; CHECK-NEXT: or.b64 %rd5, %rd4, %rd2; +; CHECK-NEXT: ld.b8 %rd6, [%rd1+10]; +; CHECK-NEXT: shl.b64 %rd7, %rd6, 16; +; CHECK-NEXT: ld.b8 %rd8, [%rd1+11]; +; CHECK-NEXT: shl.b64 %rd9, %rd8, 24; +; CHECK-NEXT: or.b64 %rd10, %rd9, %rd7; +; CHECK-NEXT: or.b64 %rd11, %rd10, %rd5; +; CHECK-NEXT: ld.b8 %rd12, [%rd1+12]; +; CHECK-NEXT: ld.b8 %rd13, [%rd1+13]; +; CHECK-NEXT: shl.b64 %rd14, %rd13, 8; +; CHECK-NEXT: or.b64 %rd15, %rd14, %rd12; +; CHECK-NEXT: ld.b8 %rd16, [%rd1+14]; +; CHECK-NEXT: shl.b64 %rd17, %rd16, 16; +; CHECK-NEXT: ld.b8 %rd18, [%rd1+15]; +; CHECK-NEXT: shl.b64 %rd19, %rd18, 24; +; CHECK-NEXT: or.b64 %rd20, %rd19, %rd17; +; CHECK-NEXT: or.b64 %rd21, %rd20, %rd15; +; CHECK-NEXT: shl.b64 %rd22, %rd21, 32; +; CHECK-NEXT: or.b64 %rd23, %rd22, %rd11; +; CHECK-NEXT: ld.b8 %rd24, [%rd1]; +; CHECK-NEXT: ld.b8 %rd25, [%rd1+1]; +; CHECK-NEXT: shl.b64 %rd26, %rd25, 8; +; CHECK-NEXT: or.b64 %rd27, %rd26, %rd24; +; CHECK-NEXT: ld.b8 %rd28, [%rd1+2]; +; CHECK-NEXT: shl.b64 %rd29, %rd28, 16; +; CHECK-NEXT: ld.b8 %rd30, [%rd1+3]; +; CHECK-NEXT: shl.b64 %rd31, %rd30, 24; +; CHECK-NEXT: or.b64 %rd32, %rd31, %rd29; +; CHECK-NEXT: or.b64 %rd33, %rd32, %rd27; +; CHECK-NEXT: ld.b8 %rd34, [%rd1+4]; +; CHECK-NEXT: ld.b8 %rd35, [%rd1+5]; +; CHECK-NEXT: shl.b64 %rd36, %rd35, 8; +; CHECK-NEXT: or.b64 %rd37, %rd36, %rd34; +; CHECK-NEXT: ld.b8 %rd38, [%rd1+6]; +; CHECK-NEXT: shl.b64 %rd39, %rd38, 16; +; CHECK-NEXT: ld.b8 %rd40, [%rd1+7]; +; CHECK-NEXT: shl.b64 %rd41, %rd40, 24; +; CHECK-NEXT: or.b64 %rd42, %rd41, %rd39; +; CHECK-NEXT: or.b64 %rd43, %rd42, %rd37; +; CHECK-NEXT: shl.b64 %rd44, %rd43, 32; +; CHECK-NEXT: or.b64 %rd45, %rd44, %rd33; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd45, %rd23}; +; CHECK-NEXT: ret; %r = load <4 x float>, ptr %p1, align 1 ret <4 x float> %r } -; CHECK-LABEL: t2 define <4 x float> @t2(ptr %p1) { -; CHECK-NOT: ld.v4 -; CHECK-NOT: ld.v2 -; CHECK: ld.b32 +; CHECK-LABEL: t2( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<10>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [t2_param_0]; +; CHECK-NEXT: ld.b32 %rd2, [%rd1+8]; +; CHECK-NEXT: ld.b32 %rd3, [%rd1+12]; +; CHECK-NEXT: shl.b64 %rd4, %rd3, 32; +; CHECK-NEXT: or.b64 %rd5, %rd4, %rd2; +; CHECK-NEXT: ld.b32 %rd6, [%rd1]; +; CHECK-NEXT: ld.b32 %rd7, [%rd1+4]; +; CHECK-NEXT: shl.b64 %rd8, %rd7, 32; +; CHECK-NEXT: or.b64 %rd9, %rd8, %rd6; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd9, %rd5}; +; CHECK-NEXT: ret; %r = load <4 x float>, ptr %p1, align 4 ret <4 x float> %r } -; CHECK-LABEL: t3 define <4 x float> @t3(ptr %p1) { -; CHECK-NOT: ld.v4 -; CHECK: ld.v2 +; CHECK-LABEL: t3( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [t3_param_0]; +; CHECK-NEXT: ld.b64 %rd2, [%rd1+8]; +; CHECK-NEXT: ld.b64 %rd3, [%rd1]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; +; CHECK-NEXT: ret; %r = load <4 x float>, ptr %p1, align 8 ret <4 x float> %r } -; CHECK-LABEL: t4 define <4 x float> @t4(ptr %p1) { -; CHECK: ld.v4 +; CHECK-LABEL: t4( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [t4_param_0]; +; CHECK-NEXT: ld.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd3}; +; CHECK-NEXT: ret; %r = load <4 x float>, ptr %p1, align 16 ret <4 x float> %r } -; CHECK-LABEL: .visible .func test_v1halfp0a1( -; CHECK-DAG: ld.param.b64 %[[FROM:rd?[0-9]+]], [test_v1halfp0a1_param_0]; -; CHECK-DAG: ld.param.b64 %[[TO:rd?[0-9]+]], [test_v1halfp0a1_param_1]; -; CHECK-DAG: ld.b8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] -; CHECK-DAG: st.b8 [%[[TO]]], [[B0]] -; CHECK-DAG: ld.b8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] -; CHECK-DAG: st.b8 [%[[TO]]+1], [[B1]] -; CHECK: ret define void @test_v1halfp0a1(ptr noalias readonly %from, ptr %to) { +; CHECK-LABEL: test_v1halfp0a1( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_v1halfp0a1_param_0]; +; CHECK-NEXT: ld.b8 %rs1, [%rd1]; +; CHECK-NEXT: ld.b8 %rs2, [%rd1+1]; +; CHECK-NEXT: ld.param.b64 %rd2, [test_v1halfp0a1_param_1]; +; CHECK-NEXT: st.b8 [%rd2+1], %rs2; +; CHECK-NEXT: st.b8 [%rd2], %rs1; +; CHECK-NEXT: ret; %1 = load <1 x half>, ptr %from , align 1 store <1 x half> %1, ptr %to , align 1 ret void } -; CHECK-LABEL: .visible .func test_v2halfp0a1( -; CHECK-DAG: ld.param.b64 %[[FROM:rd?[0-9]+]], [test_v2halfp0a1_param_0]; -; CHECK-DAG: ld.param.b64 %[[TO:rd?[0-9]+]], [test_v2halfp0a1_param_1]; -; CHECK-DAG: ld.b8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] -; CHECK-DAG: st.b8 [%[[TO]]], -; CHECK-DAG: ld.b8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] -; CHECK-DAG: st.b8 [%[[TO]]+1], -; CHECK-DAG: ld.b8 [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2] -; CHECK-DAG: st.b8 [%[[TO]]+2], -; CHECK-DAG: ld.b8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3] -; CHECK-DAG: st.b8 [%[[TO]]+3], -; CHECK: ret define void @test_v2halfp0a1(ptr noalias readonly %from, ptr %to) { +; CHECK-LABEL: test_v2halfp0a1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_v2halfp0a1_param_0]; +; CHECK-NEXT: ld.b8 %r1, [%rd1+1]; +; CHECK-NEXT: ld.b8 %r2, [%rd1]; +; CHECK-NEXT: ld.b8 %r3, [%rd1+3]; +; CHECK-NEXT: ld.b8 %r4, [%rd1+2]; +; CHECK-NEXT: ld.param.b64 %rd2, [test_v2halfp0a1_param_1]; +; CHECK-NEXT: st.b8 [%rd2+2], %r4; +; CHECK-NEXT: st.b8 [%rd2+3], %r3; +; CHECK-NEXT: st.b8 [%rd2], %r2; +; CHECK-NEXT: st.b8 [%rd2+1], %r1; +; CHECK-NEXT: ret; %1 = load <2 x half>, ptr %from , align 1 store <2 x half> %1, ptr %to , align 1 ret void } -; CHECK-LABEL: .visible .func test_v4halfp0a1( -; CHECK-DAG: ld.param.b64 %[[FROM:rd?[0-9]+]], [test_v4halfp0a1_param_0]; -; CHECK-DAG: ld.param.b64 %[[TO:rd?[0-9]+]], [test_v4halfp0a1_param_1]; -; CHECK-DAG: ld.b8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] -; CHECK-DAG: st.b8 [%[[TO]]], [[B0]] -; CHECK-DAG: ld.b8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] -; CHECK-DAG: st.b8 [%[[TO]]+1], [[B1]] -; CHECK-DAG: ld.b8 [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2] -; CHECK-DAG: st.b8 [%[[TO]]+2], [[B2]] -; CHECK-DAG: ld.b8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3] -; CHECK-DAG: st.b8 [%[[TO]]+3], [[B3]] -; CHECK-DAG: ld.b8 [[B4:%r[sd]?[0-9]+]], [%[[FROM]]+4] -; CHECK-DAG: st.b8 [%[[TO]]+4], [[B4]] -; CHECK-DAG: ld.b8 [[B5:%r[sd]?[0-9]+]], [%[[FROM]]+5] -; CHECK-DAG: st.b8 [%[[TO]]+5], [[B5]] -; CHECK-DAG: ld.b8 [[B6:%r[sd]?[0-9]+]], [%[[FROM]]+6] -; CHECK-DAG: st.b8 [%[[TO]]+6], [[B6]] -; CHECK-DAG: ld.b8 [[B7:%r[sd]?[0-9]+]], [%[[FROM]]+7] -; CHECK-DAG: st.b8 [%[[TO]]+7], [[B7]] -; CHECK: ret define void @test_v4halfp0a1(ptr noalias readonly %from, ptr %to) { +; CHECK-LABEL: test_v4halfp0a1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_v4halfp0a1_param_0]; +; CHECK-NEXT: ld.b8 %r1, [%rd1+1]; +; CHECK-NEXT: ld.b8 %r2, [%rd1]; +; CHECK-NEXT: ld.b8 %r3, [%rd1+3]; +; CHECK-NEXT: ld.b8 %r4, [%rd1+2]; +; CHECK-NEXT: ld.b8 %r5, [%rd1+5]; +; CHECK-NEXT: ld.b8 %r6, [%rd1+4]; +; CHECK-NEXT: ld.b8 %r7, [%rd1+7]; +; CHECK-NEXT: ld.b8 %r8, [%rd1+6]; +; CHECK-NEXT: ld.param.b64 %rd2, [test_v4halfp0a1_param_1]; +; CHECK-NEXT: st.b8 [%rd2+6], %r8; +; CHECK-NEXT: st.b8 [%rd2+7], %r7; +; CHECK-NEXT: st.b8 [%rd2+4], %r6; +; CHECK-NEXT: st.b8 [%rd2+5], %r5; +; CHECK-NEXT: st.b8 [%rd2+2], %r4; +; CHECK-NEXT: st.b8 [%rd2+3], %r3; +; CHECK-NEXT: st.b8 [%rd2], %r2; +; CHECK-NEXT: st.b8 [%rd2+1], %r1; +; CHECK-NEXT: ret; %1 = load <4 x half>, ptr %from , align 1 store <4 x half> %1, ptr %to , align 1 ret void } -; CHECK-LABEL: s1 define void @s1(ptr %p1, <4 x float> %v) { -; CHECK-NOT: st.v4 -; CHECK-NOT: st.v2 -; CHECK-NOT: st.b32 -; CHECK: st.b8 +; CHECK-LABEL: s1( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<18>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [s1_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [s1_param_1]; +; CHECK-NEXT: st.b8 [%rd1+8], %rd3; +; CHECK-NEXT: st.b8 [%rd1], %rd2; +; CHECK-NEXT: shr.u64 %rd4, %rd3, 56; +; CHECK-NEXT: st.b8 [%rd1+15], %rd4; +; CHECK-NEXT: shr.u64 %rd5, %rd3, 48; +; CHECK-NEXT: st.b8 [%rd1+14], %rd5; +; CHECK-NEXT: shr.u64 %rd6, %rd3, 40; +; CHECK-NEXT: st.b8 [%rd1+13], %rd6; +; CHECK-NEXT: shr.u64 %rd7, %rd3, 32; +; CHECK-NEXT: st.b8 [%rd1+12], %rd7; +; CHECK-NEXT: shr.u64 %rd8, %rd3, 24; +; CHECK-NEXT: st.b8 [%rd1+11], %rd8; +; CHECK-NEXT: shr.u64 %rd9, %rd3, 16; +; CHECK-NEXT: st.b8 [%rd1+10], %rd9; +; CHECK-NEXT: shr.u64 %rd10, %rd3, 8; +; CHECK-NEXT: st.b8 [%rd1+9], %rd10; +; CHECK-NEXT: shr.u64 %rd11, %rd2, 56; +; CHECK-NEXT: st.b8 [%rd1+7], %rd11; +; CHECK-NEXT: shr.u64 %rd12, %rd2, 48; +; CHECK-NEXT: st.b8 [%rd1+6], %rd12; +; CHECK-NEXT: shr.u64 %rd13, %rd2, 40; +; CHECK-NEXT: st.b8 [%rd1+5], %rd13; +; CHECK-NEXT: shr.u64 %rd14, %rd2, 32; +; CHECK-NEXT: st.b8 [%rd1+4], %rd14; +; CHECK-NEXT: shr.u64 %rd15, %rd2, 24; +; CHECK-NEXT: st.b8 [%rd1+3], %rd15; +; CHECK-NEXT: shr.u64 %rd16, %rd2, 16; +; CHECK-NEXT: st.b8 [%rd1+2], %rd16; +; CHECK-NEXT: shr.u64 %rd17, %rd2, 8; +; CHECK-NEXT: st.b8 [%rd1+1], %rd17; +; CHECK-NEXT: ret; store <4 x float> %v, ptr %p1, align 1 ret void } -; CHECK-LABEL: s2 define void @s2(ptr %p1, <4 x float> %v) { -; CHECK-NOT: st.v4 -; CHECK-NOT: st.v2 -; CHECK: st.b32 +; CHECK-LABEL: s2( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [s2_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [s2_param_1]; +; CHECK-NEXT: st.b32 [%rd1+8], %rd3; +; CHECK-NEXT: st.b32 [%rd1], %rd2; +; CHECK-NEXT: shr.u64 %rd4, %rd3, 32; +; CHECK-NEXT: st.b32 [%rd1+12], %rd4; +; CHECK-NEXT: shr.u64 %rd5, %rd2, 32; +; CHECK-NEXT: st.b32 [%rd1+4], %rd5; +; CHECK-NEXT: ret; store <4 x float> %v, ptr %p1, align 4 ret void } -; CHECK-LABEL: s3 define void @s3(ptr %p1, <4 x float> %v) { -; CHECK-NOT: st.v4 +; CHECK-LABEL: s3( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [s3_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [s3_param_1]; +; CHECK-NEXT: st.b64 [%rd1+8], %rd3; +; CHECK-NEXT: st.b64 [%rd1], %rd2; +; CHECK-NEXT: ret; store <4 x float> %v, ptr %p1, align 8 ret void } -; CHECK-LABEL: s4 define void @s4(ptr %p1, <4 x float> %v) { -; CHECK: st.v4 +; CHECK-LABEL: s4( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [s4_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [s4_param_1]; +; CHECK-NEXT: st.v2.b64 [%rd1], {%rd2, %rd3}; +; CHECK-NEXT: ret; store <4 x float> %v, ptr %p1, align 16 ret void } diff --git a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll index aa463b510fe84..c78fcddb7ed0f 100644 --- a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll +++ b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} @@ -8,23 +9,41 @@ @gv_float2 = external constant <2 x float> @gv_float4 = external constant <4 x float> -; CHECK-LABEL: test_gv_float() define float @test_gv_float() { -; CHECK: ld.global.nc.b32 +; CHECK-LABEL: test_gv_float( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.global.nc.b32 %r1, [gv_float]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %v = load float, ptr @gv_float ret float %v } -; CHECK-LABEL: test_gv_float2() define <2 x float> @test_gv_float2() { -; CHECK: ld.global.nc.v2.b32 +; CHECK-LABEL: test_gv_float2( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.global.nc.b64 %rd1, [gv_float2]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; %v = load <2 x float>, ptr @gv_float2 ret <2 x float> %v } -; CHECK-LABEL: test_gv_float4() define <4 x float> @test_gv_float4() { -; CHECK: ld.global.nc.v4.b32 +; CHECK-LABEL: test_gv_float4( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.global.nc.v2.b64 {%rd1, %rd2}, [gv_float4]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; %v = load <4 x float>, ptr @gv_float4 ret <4 x float> %v } diff --git a/llvm/test/CodeGen/NVPTX/reduction-intrinsics.ll b/llvm/test/CodeGen/NVPTX/reduction-intrinsics.ll index e10949f95fac4..87f965c84b6b6 100644 --- a/llvm/test/CodeGen/NVPTX/reduction-intrinsics.ll +++ b/llvm/test/CodeGen/NVPTX/reduction-intrinsics.ll @@ -114,18 +114,19 @@ define float @reduce_fadd_float(<8 x float> %in) { ; CHECK-LABEL: reduce_fadd_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<17>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fadd_float_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_float_param_0]; -; CHECK-NEXT: add.rn.f32 %r9, %r1, 0f00000000; -; CHECK-NEXT: add.rn.f32 %r10, %r9, %r2; -; CHECK-NEXT: add.rn.f32 %r11, %r10, %r3; -; CHECK-NEXT: add.rn.f32 %r12, %r11, %r4; -; CHECK-NEXT: add.rn.f32 %r13, %r12, %r5; -; CHECK-NEXT: add.rn.f32 %r14, %r13, %r6; -; CHECK-NEXT: add.rn.f32 %r15, %r14, %r7; -; CHECK-NEXT: add.rn.f32 %r16, %r15, %r8; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_float_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fadd_float_param_0]; +; CHECK-NEXT: add.rn.f32 %r9, %r5, 0f00000000; +; CHECK-NEXT: add.rn.f32 %r10, %r9, %r6; +; CHECK-NEXT: add.rn.f32 %r11, %r10, %r7; +; CHECK-NEXT: add.rn.f32 %r12, %r11, %r8; +; CHECK-NEXT: add.rn.f32 %r13, %r12, %r1; +; CHECK-NEXT: add.rn.f32 %r14, %r13, %r2; +; CHECK-NEXT: add.rn.f32 %r15, %r14, %r3; +; CHECK-NEXT: add.rn.f32 %r16, %r15, %r4; ; CHECK-NEXT: st.param.b32 [func_retval0], %r16; ; CHECK-NEXT: ret; %res = call float @llvm.vector.reduce.fadd(float 0.0, <8 x float> %in) @@ -133,45 +134,89 @@ define float @reduce_fadd_float(<8 x float> %in) { } define float @reduce_fadd_float_reassoc(<8 x float> %in) { -; CHECK-LABEL: reduce_fadd_float_reassoc( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<17>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fadd_float_reassoc_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_float_reassoc_param_0]; -; CHECK-NEXT: add.rn.f32 %r9, %r3, %r7; -; CHECK-NEXT: add.rn.f32 %r10, %r1, %r5; -; CHECK-NEXT: add.rn.f32 %r11, %r4, %r8; -; CHECK-NEXT: add.rn.f32 %r12, %r2, %r6; -; CHECK-NEXT: add.rn.f32 %r13, %r12, %r11; -; CHECK-NEXT: add.rn.f32 %r14, %r10, %r9; -; CHECK-NEXT: add.rn.f32 %r15, %r14, %r13; -; CHECK-NEXT: add.rn.f32 %r16, %r15, 0f00000000; -; CHECK-NEXT: st.param.b32 [func_retval0], %r16; -; CHECK-NEXT: ret; +; CHECK-SM80-LABEL: reduce_fadd_float_reassoc( +; CHECK-SM80: { +; CHECK-SM80-NEXT: .reg .b32 %r<17>; +; CHECK-SM80-NEXT: .reg .b64 %rd<5>; +; CHECK-SM80-EMPTY: +; CHECK-SM80-NEXT: // %bb.0: +; CHECK-SM80-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_float_reassoc_param_0+16]; +; CHECK-SM80-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fadd_float_reassoc_param_0]; +; CHECK-SM80-NEXT: add.rn.f32 %r9, %r7, %r3; +; CHECK-SM80-NEXT: add.rn.f32 %r10, %r5, %r1; +; CHECK-SM80-NEXT: add.rn.f32 %r11, %r8, %r4; +; CHECK-SM80-NEXT: add.rn.f32 %r12, %r6, %r2; +; CHECK-SM80-NEXT: add.rn.f32 %r13, %r12, %r11; +; CHECK-SM80-NEXT: add.rn.f32 %r14, %r10, %r9; +; CHECK-SM80-NEXT: add.rn.f32 %r15, %r14, %r13; +; CHECK-SM80-NEXT: add.rn.f32 %r16, %r15, 0f00000000; +; CHECK-SM80-NEXT: st.param.b32 [func_retval0], %r16; +; CHECK-SM80-NEXT: ret; +; +; CHECK-SM100-LABEL: reduce_fadd_float_reassoc( +; CHECK-SM100: { +; CHECK-SM100-NEXT: .reg .b32 %r<5>; +; CHECK-SM100-NEXT: .reg .b64 %rd<10>; +; CHECK-SM100-EMPTY: +; CHECK-SM100-NEXT: // %bb.0: +; CHECK-SM100-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fadd_float_reassoc_param_0+16]; +; CHECK-SM100-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fadd_float_reassoc_param_0]; +; CHECK-SM100-NEXT: add.rn.f32x2 %rd5, %rd2, %rd4; +; CHECK-SM100-NEXT: add.rn.f32x2 %rd6, %rd1, %rd3; +; CHECK-SM100-NEXT: add.rn.f32x2 %rd7, %rd6, %rd5; +; CHECK-SM100-NEXT: mov.b64 {_, %r1}, %rd7; +; CHECK-SM100-NEXT: // implicit-def: %r2 +; CHECK-SM100-NEXT: mov.b64 %rd8, {%r1, %r2}; +; CHECK-SM100-NEXT: add.rn.f32x2 %rd9, %rd7, %rd8; +; CHECK-SM100-NEXT: mov.b64 {%r3, _}, %rd9; +; CHECK-SM100-NEXT: add.rn.f32 %r4, %r3, 0f00000000; +; CHECK-SM100-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-SM100-NEXT: ret; %res = call reassoc float @llvm.vector.reduce.fadd(float 0.0, <8 x float> %in) ret float %res } define float @reduce_fadd_float_reassoc_nonpow2(<7 x float> %in) { -; CHECK-LABEL: reduce_fadd_float_reassoc_nonpow2( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<15>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r7, [reduce_fadd_float_reassoc_nonpow2_param_0+24]; -; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [reduce_fadd_float_reassoc_nonpow2_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_float_reassoc_nonpow2_param_0]; -; CHECK-NEXT: add.rn.f32 %r8, %r3, %r7; -; CHECK-NEXT: add.rn.f32 %r9, %r1, %r5; -; CHECK-NEXT: add.rn.f32 %r10, %r9, %r8; -; CHECK-NEXT: add.rn.f32 %r11, %r2, %r6; -; CHECK-NEXT: add.rn.f32 %r12, %r11, %r4; -; CHECK-NEXT: add.rn.f32 %r13, %r10, %r12; -; CHECK-NEXT: add.rn.f32 %r14, %r13, 0f00000000; -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; -; CHECK-NEXT: ret; +; CHECK-SM80-LABEL: reduce_fadd_float_reassoc_nonpow2( +; CHECK-SM80: { +; CHECK-SM80-NEXT: .reg .b32 %r<15>; +; CHECK-SM80-EMPTY: +; CHECK-SM80-NEXT: // %bb.0: +; CHECK-SM80-NEXT: ld.param.b32 %r7, [reduce_fadd_float_reassoc_nonpow2_param_0+24]; +; CHECK-SM80-NEXT: ld.param.v2.b32 {%r5, %r6}, [reduce_fadd_float_reassoc_nonpow2_param_0+16]; +; CHECK-SM80-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fadd_float_reassoc_nonpow2_param_0]; +; CHECK-SM80-NEXT: add.rn.f32 %r8, %r3, %r7; +; CHECK-SM80-NEXT: add.rn.f32 %r9, %r1, %r5; +; CHECK-SM80-NEXT: add.rn.f32 %r10, %r9, %r8; +; CHECK-SM80-NEXT: add.rn.f32 %r11, %r2, %r6; +; CHECK-SM80-NEXT: add.rn.f32 %r12, %r11, %r4; +; CHECK-SM80-NEXT: add.rn.f32 %r13, %r10, %r12; +; CHECK-SM80-NEXT: add.rn.f32 %r14, %r13, 0f00000000; +; CHECK-SM80-NEXT: st.param.b32 [func_retval0], %r14; +; CHECK-SM80-NEXT: ret; +; +; CHECK-SM100-LABEL: reduce_fadd_float_reassoc_nonpow2( +; CHECK-SM100: { +; CHECK-SM100-NEXT: .reg .b32 %r<13>; +; CHECK-SM100-NEXT: .reg .b64 %rd<8>; +; CHECK-SM100-EMPTY: +; CHECK-SM100-NEXT: // %bb.0: +; CHECK-SM100-NEXT: ld.param.b64 %rd1, [reduce_fadd_float_reassoc_nonpow2_param_0+16]; +; CHECK-SM100-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-SM100-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [reduce_fadd_float_reassoc_nonpow2_param_0]; +; CHECK-SM100-NEXT: mov.b64 {%r3, %r4}, %rd3; +; CHECK-SM100-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-SM100-NEXT: ld.param.b32 %r7, [reduce_fadd_float_reassoc_nonpow2_param_0+24]; +; CHECK-SM100-NEXT: add.rn.f32x2 %rd4, %rd2, %rd1; +; CHECK-SM100-NEXT: mov.b32 %r8, 0f80000000; +; CHECK-SM100-NEXT: mov.b64 %rd5, {%r7, %r8}; +; CHECK-SM100-NEXT: add.rn.f32x2 %rd6, %rd3, %rd5; +; CHECK-SM100-NEXT: add.rn.f32x2 %rd7, %rd4, %rd6; +; CHECK-SM100-NEXT: mov.b64 {%r9, %r10}, %rd7; +; CHECK-SM100-NEXT: add.rn.f32 %r11, %r9, %r10; +; CHECK-SM100-NEXT: add.rn.f32 %r12, %r11, 0f00000000; +; CHECK-SM100-NEXT: st.param.b32 [func_retval0], %r12; +; CHECK-SM100-NEXT: ret; %res = call reassoc float @llvm.vector.reduce.fadd(float 0.0, <7 x float> %in) ret float %res } @@ -273,17 +318,18 @@ define float @reduce_fmul_float(<8 x float> %in) { ; CHECK-LABEL: reduce_fmul_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmul_float_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_param_0]; -; CHECK-NEXT: mul.rn.f32 %r9, %r1, %r2; -; CHECK-NEXT: mul.rn.f32 %r10, %r9, %r3; -; CHECK-NEXT: mul.rn.f32 %r11, %r10, %r4; -; CHECK-NEXT: mul.rn.f32 %r12, %r11, %r5; -; CHECK-NEXT: mul.rn.f32 %r13, %r12, %r6; -; CHECK-NEXT: mul.rn.f32 %r14, %r13, %r7; -; CHECK-NEXT: mul.rn.f32 %r15, %r14, %r8; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmul_float_param_0]; +; CHECK-NEXT: mul.rn.f32 %r9, %r5, %r6; +; CHECK-NEXT: mul.rn.f32 %r10, %r9, %r7; +; CHECK-NEXT: mul.rn.f32 %r11, %r10, %r8; +; CHECK-NEXT: mul.rn.f32 %r12, %r11, %r1; +; CHECK-NEXT: mul.rn.f32 %r13, %r12, %r2; +; CHECK-NEXT: mul.rn.f32 %r14, %r13, %r3; +; CHECK-NEXT: mul.rn.f32 %r15, %r14, %r4; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; ; CHECK-NEXT: ret; %res = call float @llvm.vector.reduce.fmul(float 1.0, <8 x float> %in) @@ -291,43 +337,85 @@ define float @reduce_fmul_float(<8 x float> %in) { } define float @reduce_fmul_float_reassoc(<8 x float> %in) { -; CHECK-LABEL: reduce_fmul_float_reassoc( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<16>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmul_float_reassoc_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_reassoc_param_0]; -; CHECK-NEXT: mul.rn.f32 %r9, %r3, %r7; -; CHECK-NEXT: mul.rn.f32 %r10, %r1, %r5; -; CHECK-NEXT: mul.rn.f32 %r11, %r4, %r8; -; CHECK-NEXT: mul.rn.f32 %r12, %r2, %r6; -; CHECK-NEXT: mul.rn.f32 %r13, %r12, %r11; -; CHECK-NEXT: mul.rn.f32 %r14, %r10, %r9; -; CHECK-NEXT: mul.rn.f32 %r15, %r14, %r13; -; CHECK-NEXT: st.param.b32 [func_retval0], %r15; -; CHECK-NEXT: ret; +; CHECK-SM80-LABEL: reduce_fmul_float_reassoc( +; CHECK-SM80: { +; CHECK-SM80-NEXT: .reg .b32 %r<16>; +; CHECK-SM80-NEXT: .reg .b64 %rd<5>; +; CHECK-SM80-EMPTY: +; CHECK-SM80-NEXT: // %bb.0: +; CHECK-SM80-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_reassoc_param_0+16]; +; CHECK-SM80-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmul_float_reassoc_param_0]; +; CHECK-SM80-NEXT: mul.rn.f32 %r9, %r7, %r3; +; CHECK-SM80-NEXT: mul.rn.f32 %r10, %r5, %r1; +; CHECK-SM80-NEXT: mul.rn.f32 %r11, %r8, %r4; +; CHECK-SM80-NEXT: mul.rn.f32 %r12, %r6, %r2; +; CHECK-SM80-NEXT: mul.rn.f32 %r13, %r12, %r11; +; CHECK-SM80-NEXT: mul.rn.f32 %r14, %r10, %r9; +; CHECK-SM80-NEXT: mul.rn.f32 %r15, %r14, %r13; +; CHECK-SM80-NEXT: st.param.b32 [func_retval0], %r15; +; CHECK-SM80-NEXT: ret; +; +; CHECK-SM100-LABEL: reduce_fmul_float_reassoc( +; CHECK-SM100: { +; CHECK-SM100-NEXT: .reg .b32 %r<4>; +; CHECK-SM100-NEXT: .reg .b64 %rd<10>; +; CHECK-SM100-EMPTY: +; CHECK-SM100-NEXT: // %bb.0: +; CHECK-SM100-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [reduce_fmul_float_reassoc_param_0+16]; +; CHECK-SM100-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [reduce_fmul_float_reassoc_param_0]; +; CHECK-SM100-NEXT: mul.rn.f32x2 %rd5, %rd2, %rd4; +; CHECK-SM100-NEXT: mul.rn.f32x2 %rd6, %rd1, %rd3; +; CHECK-SM100-NEXT: mul.rn.f32x2 %rd7, %rd6, %rd5; +; CHECK-SM100-NEXT: mov.b64 {_, %r1}, %rd7; +; CHECK-SM100-NEXT: // implicit-def: %r2 +; CHECK-SM100-NEXT: mov.b64 %rd8, {%r1, %r2}; +; CHECK-SM100-NEXT: mul.rn.f32x2 %rd9, %rd7, %rd8; +; CHECK-SM100-NEXT: mov.b64 {%r3, _}, %rd9; +; CHECK-SM100-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-SM100-NEXT: ret; %res = call reassoc float @llvm.vector.reduce.fmul(float 1.0, <8 x float> %in) ret float %res } define float @reduce_fmul_float_reassoc_nonpow2(<7 x float> %in) { -; CHECK-LABEL: reduce_fmul_float_reassoc_nonpow2( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<14>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r7, [reduce_fmul_float_reassoc_nonpow2_param_0+24]; -; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [reduce_fmul_float_reassoc_nonpow2_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_reassoc_nonpow2_param_0]; -; CHECK-NEXT: mul.rn.f32 %r8, %r3, %r7; -; CHECK-NEXT: mul.rn.f32 %r9, %r1, %r5; -; CHECK-NEXT: mul.rn.f32 %r10, %r9, %r8; -; CHECK-NEXT: mul.rn.f32 %r11, %r2, %r6; -; CHECK-NEXT: mul.rn.f32 %r12, %r11, %r4; -; CHECK-NEXT: mul.rn.f32 %r13, %r10, %r12; -; CHECK-NEXT: st.param.b32 [func_retval0], %r13; -; CHECK-NEXT: ret; +; CHECK-SM80-LABEL: reduce_fmul_float_reassoc_nonpow2( +; CHECK-SM80: { +; CHECK-SM80-NEXT: .reg .b32 %r<14>; +; CHECK-SM80-EMPTY: +; CHECK-SM80-NEXT: // %bb.0: +; CHECK-SM80-NEXT: ld.param.b32 %r7, [reduce_fmul_float_reassoc_nonpow2_param_0+24]; +; CHECK-SM80-NEXT: ld.param.v2.b32 {%r5, %r6}, [reduce_fmul_float_reassoc_nonpow2_param_0+16]; +; CHECK-SM80-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmul_float_reassoc_nonpow2_param_0]; +; CHECK-SM80-NEXT: mul.rn.f32 %r8, %r3, %r7; +; CHECK-SM80-NEXT: mul.rn.f32 %r9, %r1, %r5; +; CHECK-SM80-NEXT: mul.rn.f32 %r10, %r9, %r8; +; CHECK-SM80-NEXT: mul.rn.f32 %r11, %r2, %r6; +; CHECK-SM80-NEXT: mul.rn.f32 %r12, %r11, %r4; +; CHECK-SM80-NEXT: mul.rn.f32 %r13, %r10, %r12; +; CHECK-SM80-NEXT: st.param.b32 [func_retval0], %r13; +; CHECK-SM80-NEXT: ret; +; +; CHECK-SM100-LABEL: reduce_fmul_float_reassoc_nonpow2( +; CHECK-SM100: { +; CHECK-SM100-NEXT: .reg .b32 %r<12>; +; CHECK-SM100-NEXT: .reg .b64 %rd<8>; +; CHECK-SM100-EMPTY: +; CHECK-SM100-NEXT: // %bb.0: +; CHECK-SM100-NEXT: ld.param.b64 %rd1, [reduce_fmul_float_reassoc_nonpow2_param_0+16]; +; CHECK-SM100-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-SM100-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [reduce_fmul_float_reassoc_nonpow2_param_0]; +; CHECK-SM100-NEXT: mov.b64 {%r3, %r4}, %rd3; +; CHECK-SM100-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-SM100-NEXT: ld.param.b32 %r7, [reduce_fmul_float_reassoc_nonpow2_param_0+24]; +; CHECK-SM100-NEXT: mul.rn.f32x2 %rd4, %rd2, %rd1; +; CHECK-SM100-NEXT: mov.b32 %r8, 0f3F800000; +; CHECK-SM100-NEXT: mov.b64 %rd5, {%r7, %r8}; +; CHECK-SM100-NEXT: mul.rn.f32x2 %rd6, %rd3, %rd5; +; CHECK-SM100-NEXT: mul.rn.f32x2 %rd7, %rd4, %rd6; +; CHECK-SM100-NEXT: mov.b64 {%r9, %r10}, %rd7; +; CHECK-SM100-NEXT: mul.rn.f32 %r11, %r9, %r10; +; CHECK-SM100-NEXT: st.param.b32 [func_retval0], %r11; +; CHECK-SM100-NEXT: ret; %res = call reassoc float @llvm.vector.reduce.fmul(float 1.0, <7 x float> %in) ret float %res } @@ -403,15 +491,16 @@ define float @reduce_fmax_float(<8 x float> %in) { ; CHECK-LABEL: reduce_fmax_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmax_float_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmax_float_param_0]; -; CHECK-NEXT: max.f32 %r9, %r4, %r8; -; CHECK-NEXT: max.f32 %r10, %r2, %r6; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmax_float_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmax_float_param_0]; +; CHECK-NEXT: max.f32 %r9, %r8, %r4; +; CHECK-NEXT: max.f32 %r10, %r6, %r2; ; CHECK-NEXT: max.f32 %r11, %r10, %r9; -; CHECK-NEXT: max.f32 %r12, %r3, %r7; -; CHECK-NEXT: max.f32 %r13, %r1, %r5; +; CHECK-NEXT: max.f32 %r12, %r7, %r3; +; CHECK-NEXT: max.f32 %r13, %r5, %r1; ; CHECK-NEXT: max.f32 %r14, %r13, %r12; ; CHECK-NEXT: max.f32 %r15, %r14, %r11; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; @@ -425,15 +514,16 @@ define float @reduce_fmax_float_reassoc(<8 x float> %in) { ; CHECK-LABEL: reduce_fmax_float_reassoc( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmax_float_reassoc_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmax_float_reassoc_param_0]; -; CHECK-NEXT: max.f32 %r9, %r4, %r8; -; CHECK-NEXT: max.f32 %r10, %r2, %r6; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmax_float_reassoc_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmax_float_reassoc_param_0]; +; CHECK-NEXT: max.f32 %r9, %r8, %r4; +; CHECK-NEXT: max.f32 %r10, %r6, %r2; ; CHECK-NEXT: max.f32 %r11, %r10, %r9; -; CHECK-NEXT: max.f32 %r12, %r3, %r7; -; CHECK-NEXT: max.f32 %r13, %r1, %r5; +; CHECK-NEXT: max.f32 %r12, %r7, %r3; +; CHECK-NEXT: max.f32 %r13, %r5, %r1; ; CHECK-NEXT: max.f32 %r14, %r13, %r12; ; CHECK-NEXT: max.f32 %r15, %r14, %r11; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; @@ -535,15 +625,16 @@ define float @reduce_fmin_float(<8 x float> %in) { ; CHECK-LABEL: reduce_fmin_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmin_float_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmin_float_param_0]; -; CHECK-NEXT: min.f32 %r9, %r4, %r8; -; CHECK-NEXT: min.f32 %r10, %r2, %r6; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmin_float_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmin_float_param_0]; +; CHECK-NEXT: min.f32 %r9, %r8, %r4; +; CHECK-NEXT: min.f32 %r10, %r6, %r2; ; CHECK-NEXT: min.f32 %r11, %r10, %r9; -; CHECK-NEXT: min.f32 %r12, %r3, %r7; -; CHECK-NEXT: min.f32 %r13, %r1, %r5; +; CHECK-NEXT: min.f32 %r12, %r7, %r3; +; CHECK-NEXT: min.f32 %r13, %r5, %r1; ; CHECK-NEXT: min.f32 %r14, %r13, %r12; ; CHECK-NEXT: min.f32 %r15, %r14, %r11; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; @@ -557,15 +648,16 @@ define float @reduce_fmin_float_reassoc(<8 x float> %in) { ; CHECK-LABEL: reduce_fmin_float_reassoc( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmin_float_reassoc_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmin_float_reassoc_param_0]; -; CHECK-NEXT: min.f32 %r9, %r4, %r8; -; CHECK-NEXT: min.f32 %r10, %r2, %r6; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmin_float_reassoc_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmin_float_reassoc_param_0]; +; CHECK-NEXT: min.f32 %r9, %r8, %r4; +; CHECK-NEXT: min.f32 %r10, %r6, %r2; ; CHECK-NEXT: min.f32 %r11, %r10, %r9; -; CHECK-NEXT: min.f32 %r12, %r3, %r7; -; CHECK-NEXT: min.f32 %r13, %r1, %r5; +; CHECK-NEXT: min.f32 %r12, %r7, %r3; +; CHECK-NEXT: min.f32 %r13, %r5, %r1; ; CHECK-NEXT: min.f32 %r14, %r13, %r12; ; CHECK-NEXT: min.f32 %r15, %r14, %r11; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; @@ -667,15 +759,16 @@ define float @reduce_fmaximum_float(<8 x float> %in) { ; CHECK-LABEL: reduce_fmaximum_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmaximum_float_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmaximum_float_param_0]; -; CHECK-NEXT: max.NaN.f32 %r9, %r4, %r8; -; CHECK-NEXT: max.NaN.f32 %r10, %r2, %r6; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmaximum_float_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmaximum_float_param_0]; +; CHECK-NEXT: max.NaN.f32 %r9, %r8, %r4; +; CHECK-NEXT: max.NaN.f32 %r10, %r6, %r2; ; CHECK-NEXT: max.NaN.f32 %r11, %r10, %r9; -; CHECK-NEXT: max.NaN.f32 %r12, %r3, %r7; -; CHECK-NEXT: max.NaN.f32 %r13, %r1, %r5; +; CHECK-NEXT: max.NaN.f32 %r12, %r7, %r3; +; CHECK-NEXT: max.NaN.f32 %r13, %r5, %r1; ; CHECK-NEXT: max.NaN.f32 %r14, %r13, %r12; ; CHECK-NEXT: max.NaN.f32 %r15, %r14, %r11; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; @@ -689,15 +782,16 @@ define float @reduce_fmaximum_float_reassoc(<8 x float> %in) { ; CHECK-LABEL: reduce_fmaximum_float_reassoc( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmaximum_float_reassoc_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmaximum_float_reassoc_param_0]; -; CHECK-NEXT: max.NaN.f32 %r9, %r4, %r8; -; CHECK-NEXT: max.NaN.f32 %r10, %r2, %r6; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fmaximum_float_reassoc_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fmaximum_float_reassoc_param_0]; +; CHECK-NEXT: max.NaN.f32 %r9, %r8, %r4; +; CHECK-NEXT: max.NaN.f32 %r10, %r6, %r2; ; CHECK-NEXT: max.NaN.f32 %r11, %r10, %r9; -; CHECK-NEXT: max.NaN.f32 %r12, %r3, %r7; -; CHECK-NEXT: max.NaN.f32 %r13, %r1, %r5; +; CHECK-NEXT: max.NaN.f32 %r12, %r7, %r3; +; CHECK-NEXT: max.NaN.f32 %r13, %r5, %r1; ; CHECK-NEXT: max.NaN.f32 %r14, %r13, %r12; ; CHECK-NEXT: max.NaN.f32 %r15, %r14, %r11; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; @@ -799,15 +893,16 @@ define float @reduce_fminimum_float(<8 x float> %in) { ; CHECK-LABEL: reduce_fminimum_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fminimum_float_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fminimum_float_param_0]; -; CHECK-NEXT: min.NaN.f32 %r9, %r4, %r8; -; CHECK-NEXT: min.NaN.f32 %r10, %r2, %r6; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fminimum_float_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fminimum_float_param_0]; +; CHECK-NEXT: min.NaN.f32 %r9, %r8, %r4; +; CHECK-NEXT: min.NaN.f32 %r10, %r6, %r2; ; CHECK-NEXT: min.NaN.f32 %r11, %r10, %r9; -; CHECK-NEXT: min.NaN.f32 %r12, %r3, %r7; -; CHECK-NEXT: min.NaN.f32 %r13, %r1, %r5; +; CHECK-NEXT: min.NaN.f32 %r12, %r7, %r3; +; CHECK-NEXT: min.NaN.f32 %r13, %r5, %r1; ; CHECK-NEXT: min.NaN.f32 %r14, %r13, %r12; ; CHECK-NEXT: min.NaN.f32 %r15, %r14, %r11; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; @@ -821,15 +916,16 @@ define float @reduce_fminimum_float_reassoc(<8 x float> %in) { ; CHECK-LABEL: reduce_fminimum_float_reassoc( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fminimum_float_reassoc_param_0+16]; -; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fminimum_float_reassoc_param_0]; -; CHECK-NEXT: min.NaN.f32 %r9, %r4, %r8; -; CHECK-NEXT: min.NaN.f32 %r10, %r2, %r6; +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [reduce_fminimum_float_reassoc_param_0+16]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [reduce_fminimum_float_reassoc_param_0]; +; CHECK-NEXT: min.NaN.f32 %r9, %r8, %r4; +; CHECK-NEXT: min.NaN.f32 %r10, %r6, %r2; ; CHECK-NEXT: min.NaN.f32 %r11, %r10, %r9; -; CHECK-NEXT: min.NaN.f32 %r12, %r3, %r7; -; CHECK-NEXT: min.NaN.f32 %r13, %r1, %r5; +; CHECK-NEXT: min.NaN.f32 %r12, %r7, %r3; +; CHECK-NEXT: min.NaN.f32 %r13, %r5, %r1; ; CHECK-NEXT: min.NaN.f32 %r14, %r13, %r12; ; CHECK-NEXT: min.NaN.f32 %r15, %r14, %r11; ; CHECK-NEXT: st.param.b32 [func_retval0], %r15; diff --git a/llvm/test/CodeGen/NVPTX/vec-param-load.ll b/llvm/test/CodeGen/NVPTX/vec-param-load.ll index 765e50554c8d2..29939e323b4b1 100644 --- a/llvm/test/CodeGen/NVPTX/vec-param-load.ll +++ b/llvm/test/CodeGen/NVPTX/vec-param-load.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} @@ -5,75 +6,104 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define <16 x float> @test_v16f32(<16 x float> %a) { ; CHECK-LABEL: test_v16f32( -; CHECK-DAG: ld.param.v4.b32 {[[V_12_15:(%r[0-9]+[, ]*){4}]]}, [test_v16f32_param_0+48]; -; CHECK-DAG: ld.param.v4.b32 {[[V_8_11:(%r[0-9]+[, ]*){4}]]}, [test_v16f32_param_0+32]; -; CHECK-DAG: ld.param.v4.b32 {[[V_4_7:(%r[0-9]+[, ]*){4}]]}, [test_v16f32_param_0+16]; -; CHECK-DAG: ld.param.v4.b32 {[[V_0_3:(%r[0-9]+[, ]*){4}]]}, [test_v16f32_param_0]; -; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[V_0_3]]} -; CHECK-DAG: st.param.v4.b32 [func_retval0+16], {[[V_4_7]]} -; CHECK-DAG: st.param.v4.b32 [func_retval0+32], {[[V_8_11]]} -; CHECK-DAG: st.param.v4.b32 [func_retval0+48], {[[V_12_15]]} -; CHECK: ret; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_v16f32_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_v16f32_param_0+16]; +; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [test_v16f32_param_0+32]; +; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [test_v16f32_param_0+48]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+48], {%rd7, %rd8}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+32], {%rd5, %rd6}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; ret <16 x float> %a } define <8 x float> @test_v8f32(<8 x float> %a) { ; CHECK-LABEL: test_v8f32( -; CHECK-DAG: ld.param.v4.b32 {[[V_4_7:(%r[0-9]+[, ]*){4}]]}, [test_v8f32_param_0+16]; -; CHECK-DAG: ld.param.v4.b32 {[[V_0_3:(%r[0-9]+[, ]*){4}]]}, [test_v8f32_param_0]; -; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[V_0_3]]} -; CHECK-DAG: st.param.v4.b32 [func_retval0+16], {[[V_4_7]]} -; CHECK: ret; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_v8f32_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_v8f32_param_0+16]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; ret <8 x float> %a } define <4 x float> @test_v4f32(<4 x float> %a) { ; CHECK-LABEL: test_v4f32( -; CHECK-DAG: ld.param.v4.b32 {[[V_0_3:(%r[0-9]+[, ]*){4}]]}, [test_v4f32_param_0]; -; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[V_0_3]]} -; CHECK: ret; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_v4f32_param_0]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; ret <4 x float> %a } define <2 x float> @test_v2f32(<2 x float> %a) { ; CHECK-LABEL: test_v2f32( -; CHECK-DAG: ld.param.v2.b32 {[[V_0_3:(%r[0-9]+[, ]*){2}]]}, [test_v2f32_param_0]; -; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[V_0_3]]} -; CHECK: ret; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_v2f32_param_0]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; ret <2 x float> %a } ; Oddly shaped vectors should not load any extra elements. define <3 x float> @test_v3f32(<3 x float> %a) { ; CHECK-LABEL: test_v3f32( -; CHECK-DAG: ld.param.b32 [[V_2:%r[0-9]+]], [test_v3f32_param_0+8]; -; CHECK-DAG: ld.param.v2.b32 {[[V_0_1:(%r[0-9]+[, ]*){2}]]}, [test_v3f32_param_0]; -; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[V_0_1]]} -; CHECK-DAG: st.param.b32 [func_retval0+8], [[V_2]] -; CHECK: ret; +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_v3f32_param_0+8]; +; CHECK-NEXT: st.param.b32 [func_retval0+8], %r1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; ret <3 x float> %a } define <8 x i64> @test_v8i64(<8 x i64> %a) { ; CHECK-LABEL: test_v8i64( -; CHECK-DAG: ld.param.v2.b64 {[[V_6_7:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0+48]; -; CHECK-DAG: ld.param.v2.b64 {[[V_4_5:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0+32]; -; CHECK-DAG: ld.param.v2.b64 {[[V_2_3:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0+16]; -; CHECK-DAG: ld.param.v2.b64 {[[V_0_1:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0]; -; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[V_0_1]]} -; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[V_2_3]]} -; CHECK-DAG: st.param.v2.b64 [func_retval0+32], {[[V_4_5]]} -; CHECK-DAG: st.param.v2.b64 [func_retval0+48], {[[V_6_7]]} -; CHECK: ret; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_v8i64_param_0]; +; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_v8i64_param_0+16]; +; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [test_v8i64_param_0+32]; +; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [test_v8i64_param_0+48]; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+48], {%rd7, %rd8}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+32], {%rd5, %rd6}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; +; CHECK-NEXT: ret; ret <8 x i64> %a } define <16 x i16> @test_v16i16(<16 x i16> %a) { ; CHECK-LABEL: test_v16i16( -; CHECK-DAG: ld.param.v4.b32 {[[V_8_15:(%r[0-9]+[, ]*){4}]]}, [test_v16i16_param_0+16]; -; CHECK-DAG: ld.param.v4.b32 {[[V_0_7:(%r[0-9]+[, ]*){4}]]}, [test_v16i16_param_0]; -; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[V_0_7]]} -; CHECK-DAG: st.param.v4.b32 [func_retval0+16], {[[V_8_15]]} -; CHECK: ret; +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_v16i16_param_0]; +; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [test_v16i16_param_0+16]; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+16], {%r5, %r6, %r7, %r8}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ret; ret <16 x i16> %a } diff --git a/llvm/test/CodeGen/NVPTX/vector-args.ll b/llvm/test/CodeGen/NVPTX/vector-args.ll index b08c19206a0b8..17468d56aa574 100644 --- a/llvm/test/CodeGen/NVPTX/vector-args.ll +++ b/llvm/test/CodeGen/NVPTX/vector-args.ll @@ -1,10 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} define float @foo(<2 x float> %a) { -; CHECK: .func (.param .b32 func_retval0) foo -; CHECK: .param .align 8 .b8 foo_param_0[8] -; CHECK: ld.param.v2.b32 {%r{{[0-9]+}}, %r{{[0-9]+}}} +; CHECK-LABEL: foo( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [foo_param_0]; +; CHECK-NEXT: mul.rn.f32 %r3, %r2, %r2; +; CHECK-NEXT: mul.rn.f32 %r4, %r1, %r1; +; CHECK-NEXT: add.rn.f32 %r5, %r4, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: ret; %t1 = fmul <2 x float> %a, %a %t2 = extractelement <2 x float> %t1, i32 0 %t3 = extractelement <2 x float> %t1, i32 1 @@ -14,9 +23,17 @@ define float @foo(<2 x float> %a) { define float @bar(<4 x float> %a) { -; CHECK: .func (.param .b32 func_retval0) bar -; CHECK: .param .align 16 .b8 bar_param_0[16] -; CHECK: ld.param.v4.b32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} +; CHECK-LABEL: bar( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<8>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [bar_param_0]; +; CHECK-NEXT: mul.rn.f32 %r5, %r2, %r2; +; CHECK-NEXT: mul.rn.f32 %r6, %r1, %r1; +; CHECK-NEXT: add.rn.f32 %r7, %r6, %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NEXT: ret; %t1 = fmul <4 x float> %a, %a %t2 = extractelement <4 x float> %t1, i32 0 %t3 = extractelement <4 x float> %t1, i32 1 @@ -26,10 +43,18 @@ define float @bar(<4 x float> %a) { define <4 x float> @baz(<4 x float> %a) { -; CHECK: .func (.param .align 16 .b8 func_retval0[16]) baz -; CHECK: .param .align 16 .b8 baz_param_0[16] -; CHECK: ld.param.v4.b32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} -; CHECK: st.param.v4.b32 [func_retval0], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} +; CHECK-LABEL: baz( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [baz_param_0]; +; CHECK-NEXT: mul.rn.f32 %r5, %r4, %r4; +; CHECK-NEXT: mul.rn.f32 %r6, %r3, %r3; +; CHECK-NEXT: mul.rn.f32 %r7, %r2, %r2; +; CHECK-NEXT: mul.rn.f32 %r8, %r1, %r1; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NEXT: ret; %t1 = fmul <4 x float> %a, %a ret <4 x float> %t1 } diff --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll index 1ae6f6bcd748f..e16fc74325416 100644 --- a/llvm/test/CodeGen/NVPTX/vector-loads.ll +++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll @@ -207,18 +207,18 @@ define void @extv8f16_global_a16(ptr addrspace(1) noalias readonly align 16 %dst ; CHECK-NEXT: ld.param.b64 %rd1, [extv8f16_global_a16_param_0]; ; CHECK-NEXT: ld.param.b64 %rd2, [extv8f16_global_a16_param_1]; ; CHECK-NEXT: ld.global.v4.b32 {%r1, %r2, %r3, %r4}, [%rd2]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3; -; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r4; -; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r1; -; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r2; -; CHECK-NEXT: cvt.f32.f16 %r5, %rs8; -; CHECK-NEXT: cvt.f32.f16 %r6, %rs7; -; CHECK-NEXT: cvt.f32.f16 %r7, %rs6; -; CHECK-NEXT: cvt.f32.f16 %r8, %rs5; -; CHECK-NEXT: cvt.f32.f16 %r9, %rs4; -; CHECK-NEXT: cvt.f32.f16 %r10, %rs3; -; CHECK-NEXT: cvt.f32.f16 %r11, %rs2; -; CHECK-NEXT: cvt.f32.f16 %r12, %rs1; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NEXT: cvt.f32.f16 %r5, %rs2; +; CHECK-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NEXT: cvt.f32.f16 %r7, %rs4; +; CHECK-NEXT: cvt.f32.f16 %r8, %rs3; +; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r4; +; CHECK-NEXT: cvt.f32.f16 %r9, %rs6; +; CHECK-NEXT: cvt.f32.f16 %r10, %rs5; +; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r3; +; CHECK-NEXT: cvt.f32.f16 %r11, %rs8; +; CHECK-NEXT: cvt.f32.f16 %r12, %rs7; ; CHECK-NEXT: st.global.v4.b32 [%rd1+16], {%r12, %r11, %r10, %r9}; ; CHECK-NEXT: st.global.v4.b32 [%rd1], {%r8, %r7, %r6, %r5}; ; CHECK-NEXT: ret; @@ -271,18 +271,18 @@ define void @extv8f16_generic_a16(ptr noalias readonly align 16 %dst, ptr noalia ; CHECK-NEXT: ld.param.b64 %rd1, [extv8f16_generic_a16_param_0]; ; CHECK-NEXT: ld.param.b64 %rd2, [extv8f16_generic_a16_param_1]; ; CHECK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd2]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3; -; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r4; -; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r1; -; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r2; -; CHECK-NEXT: cvt.f32.f16 %r5, %rs8; -; CHECK-NEXT: cvt.f32.f16 %r6, %rs7; -; CHECK-NEXT: cvt.f32.f16 %r7, %rs6; -; CHECK-NEXT: cvt.f32.f16 %r8, %rs5; -; CHECK-NEXT: cvt.f32.f16 %r9, %rs4; -; CHECK-NEXT: cvt.f32.f16 %r10, %rs3; -; CHECK-NEXT: cvt.f32.f16 %r11, %rs2; -; CHECK-NEXT: cvt.f32.f16 %r12, %rs1; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NEXT: cvt.f32.f16 %r5, %rs2; +; CHECK-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NEXT: cvt.f32.f16 %r7, %rs4; +; CHECK-NEXT: cvt.f32.f16 %r8, %rs3; +; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r4; +; CHECK-NEXT: cvt.f32.f16 %r9, %rs6; +; CHECK-NEXT: cvt.f32.f16 %r10, %rs5; +; CHECK-NEXT: mov.b32 {%rs7, %rs8}, %r3; +; CHECK-NEXT: cvt.f32.f16 %r11, %rs8; +; CHECK-NEXT: cvt.f32.f16 %r12, %rs7; ; CHECK-NEXT: st.v4.b32 [%rd1+16], {%r12, %r11, %r10, %r9}; ; CHECK-NEXT: st.v4.b32 [%rd1], {%r8, %r7, %r6, %r5}; ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/vector-stores.ll b/llvm/test/CodeGen/NVPTX/vector-stores.ll index f3b1015070085..d07c740d32a72 100644 --- a/llvm/test/CodeGen/NVPTX/vector-stores.ll +++ b/llvm/test/CodeGen/NVPTX/vector-stores.ll @@ -1,38 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} -; CHECK-LABEL: .visible .func foo1 -; CHECK: st.v2.b32 define void @foo1(<2 x float> %val, ptr %ptr) { +; CHECK-LABEL: foo1( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [foo1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [foo1_param_1]; +; CHECK-NEXT: st.b64 [%rd2], %rd1; +; CHECK-NEXT: ret; store <2 x float> %val, ptr %ptr ret void } -; CHECK-LABEL: .visible .func foo2 -; CHECK: st.v4.b32 define void @foo2(<4 x float> %val, ptr %ptr) { +; CHECK-LABEL: foo2( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [foo2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd3, [foo2_param_1]; +; CHECK-NEXT: st.v2.b64 [%rd3], {%rd1, %rd2}; +; CHECK-NEXT: ret; store <4 x float> %val, ptr %ptr ret void } -; CHECK-LABEL: .visible .func foo3 -; CHECK: st.v2.b32 define void @foo3(<2 x i32> %val, ptr %ptr) { +; CHECK-LABEL: foo3( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [foo3_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [foo3_param_1]; +; CHECK-NEXT: st.v2.b32 [%rd1], {%r1, %r2}; +; CHECK-NEXT: ret; store <2 x i32> %val, ptr %ptr ret void } -; CHECK-LABEL: .visible .func foo4 -; CHECK: st.v4.b32 define void @foo4(<4 x i32> %val, ptr %ptr) { +; CHECK-LABEL: foo4( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [foo4_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [foo4_param_1]; +; CHECK-NEXT: st.v4.b32 [%rd1], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ret; store <4 x i32> %val, ptr %ptr ret void } -; CHECK-LABEL: .visible .func v16i8 define void @v16i8(ptr %a, ptr %b) { -; CHECK: ld.v4.b32 -; CHECK: st.v4.b32 +; CHECK-LABEL: v16i8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [v16i8_param_0]; +; CHECK-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.param.b64 %rd2, [v16i8_param_1]; +; CHECK-NEXT: st.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: ret; %v = load <16 x i8>, ptr %a store <16 x i8> %v, ptr %b ret void diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll new file mode 100644 index 0000000000000..59173e22edf26 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll @@ -0,0 +1,246 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \ +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64 + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32 + +define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) { +; This testcase is manually reduced to isolate the critical code blocks. +; It is designed to check for vector comparison specifically for zero vectors. +; In the vector.body section, we are expecting a comparison instruction (vcmpequh), +; merge instructions (vmrghh and vmrglh) which use exactly 2 vectors. +; The output of the merge instruction is being used by xxland and finally +; accumulated by vadduwm instruction. + +; POWERPC_64LE-LABEL: test_Greater_than: +; POWERPC_64LE: .LBB0_6: # %vector.body +; POWERPC_64LE-NEXT: # +; POWERPC_64LE-NEXT: lxv [[R1:[0-9]+]], -64(4) +; POWERPC_64LE-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] +; POWERPC_64LE-NEXT: xxlnor [[R1]], [[R1]], [[R1]] +; POWERPC_64LE-NEXT: vmrghh [[R4:[0-9]+]], [[R2]], [[R2]] +; POWERPC_64LE-NEXT: vmrglh [[R2]], [[R2]], [[R2]] +; POWERPC_64LE-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] +; POWERPC_64LE-NEXT: xxland [[R1]], [[R1]], [[R6]] +; POWERPC_64LE-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] +; POWERPC_64LE: .LBB0_10: # %vec.epilog.vector.body +; POWERPC_64LE-NEXT: # +; POWERPC_64LE-NEXT: lxv [[R8:[0-9]+]], 0(4) +; POWERPC_64LE-NEXT: addi 4, 4, 16 +; POWERPC_64LE-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]] +; POWERPC_64LE-NEXT: xxlnor [[R8]], [[R8]], [[R8]] +; POWERPC_64LE-NEXT: vmrglh [[R11:[0-9]+]], [[R9]], [[R9]] +; POWERPC_64LE-NEXT: vmrghh [[R9]], [[R9]], [[R9]] +; POWERPC_64LE-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] +; POWERPC_64LE-NEXT: xxland [[R8]], [[R8]], [[R6]] +; POWERPC_64LE-NEXT: vadduwm [[R7]], [[R7]], [[R9]] +; POWERPC_64LE-NEXT: vadduwm [[R3]], [[R3]], [[R11]] +; POWERPC_64LE-NEXT: bdnz .LBB0_10 +; POWERPC_64LE: blr +; +; POWERPC_64-LABEL: test_Greater_than: +; POWERPC_64: L..BB0_6: # %vector.body +; POWERPC_64-NEXT: # +; POWERPC_64-NEXT: lxv [[R1:[0-9]+]], -64(4) +; POWERPC_64-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] +; POWERPC_64-NEXT: xxlnor [[R1]], [[R1]], [[R1]] +; POWERPC_64-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]] +; POWERPC_64-NEXT: vmrghh [[R2]], [[R2]], [[R2]] +; POWERPC_64-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] +; POWERPC_64-NEXT: xxland [[R1]], [[R1]], [[R6]] +; POWERPC_64-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] +; POWERPC_64: L..BB0_10: # %vec.epilog.vector.body +; POWERPC_64-NEXT: # +; POWERPC_64-NEXT: lxv [[R8:[0-9]+]], 0(4) +; POWERPC_64-NEXT: addi 4, 4, 16 +; POWERPC_64-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]] +; POWERPC_64-NEXT: xxlnor [[R8]], [[R8]], [[R8]] +; POWERPC_64-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]] +; POWERPC_64-NEXT: vmrglh [[R9]], [[R9]], [[R9]] +; POWERPC_64-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] +; POWERPC_64-NEXT: xxland [[R8]], [[R8]], [[R6]] +; POWERPC_64-NEXT: vadduwm [[R7]], [[R7]], [[R9]] +; POWERPC_64-NEXT: vadduwm [[R3]], [[R3]], [[R11]] +; POWERPC_64-NEXT: bdnz L..BB0_10 +; POWERPC_64: blr +; +; POWERPC_32-LABEL: test_Greater_than: +; POWERPC_32: L..BB0_7: # %vector.body +; POWERPC_32-NEXT: # +; POWERPC_32-NEXT: lxv [[R1:[0-9]+]], 0(10) +; POWERPC_32-NEXT: addic [[R13:[0-9]+]], [[R13]], 64 +; POWERPC_32-NEXT: addze [[R14:[0-9]+]], [[R14]] +; POWERPC_32-NEXT: xor [[R15:[0-9]+]], [[R13]], [[R16:[0-9]+]] +; POWERPC_32-NEXT: or. [[R15]], [[R15]], [[R14]] +; POWERPC_32-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] +; POWERPC_32-NEXT: xxlnor [[R1]], [[R1]], [[R1]] +; POWERPC_32-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]] +; POWERPC_32-NEXT: vmrghh [[R2]], [[R2]], [[R2]] +; POWERPC_32-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] +; POWERPC_32-NEXT: xxland [[R1]], [[R1]], [[R6]] +; POWERPC_32-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] +; POWERPC_32: L..BB0_11: # %vec.epilog.vector.body +; POWERPC_32-NEXT: # +; POWERPC_32-NEXT: slwi [[R14]], [[R13]], 1 +; POWERPC_32-NEXT: addic [[R13]], [[R13]], 8 +; POWERPC_32-NEXT: addze [[R17:[0-9]+]], [[R17]] +; POWERPC_32-NEXT: lxvx [[R8:[0-9]+]], [[R18:[0-9]+]], [[R14]] +; POWERPC_32-NEXT: xor [[R14]], [[R13]], [[R16]] +; POWERPC_32-NEXT: or. [[R14]], [[R14]], [[R17]] +; POWERPC_32-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R3]] +; POWERPC_32-NEXT: xxlnor [[R8]], [[R8]], [[R8]] +; POWERPC_32-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]] +; POWERPC_32-NEXT: vmrglh [[R9]], [[R9]], [[R9]] +; POWERPC_32-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] +; POWERPC_32-NEXT: xxland [[R8]], [[R8]], [[R6]] +; POWERPC_32-NEXT: vadduwm [[R7]], [[R7]], [[R9]] +; POWERPC_32-NEXT: vadduwm [[R19:[0-9]+]], [[R19]], [[R11]] +; POWERPC_32-NEXT: bne 0, L..BB0_11 +; POWERPC_32: blr + entry: + %cmp5 = icmp sgt i32 %ncols, 0 + br i1 %cmp5, label %iter.check, label %for.cond.cleanup + +iter.check: ; preds = %entry + %wide.trip.count = zext nneg i32 %ncols to i64 + %min.iters.check = icmp ult i32 %ncols, 8 + br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check + +for.body.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check + %indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec31, %vec.epilog.middle.block ] + %num_cols_needed.06.ph = phi i32 [ 0, %iter.check ], [ %33, %vec.epilog.iter.check ], [ %40, %vec.epilog.middle.block ] + br label %for.body + +vector.main.loop.iter.check: ; preds = %iter.check + %min.iters.check9 = icmp ult i32 %ncols, 64 + br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph + +vector.ph: ; preds = %vector.main.loop.iter.check + %n.vec = and i64 %wide.trip.count, 2147483584 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.phi = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %24, %vector.body ] + %vec.phi10 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %25, %vector.body ] + %vec.phi11 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %26, %vector.body ] + %vec.phi12 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %27, %vector.body ] + %vec.phi13 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %28, %vector.body ] + %vec.phi14 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %29, %vector.body ] + %vec.phi15 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %30, %vector.body ] + %vec.phi16 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ] + %0 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index + %1 = getelementptr inbounds nuw i8, ptr %0, i64 16 + %2 = getelementptr inbounds nuw i8, ptr %0, i64 32 + %3 = getelementptr inbounds nuw i8, ptr %0, i64 48 + %4 = getelementptr inbounds nuw i8, ptr %0, i64 64 + %5 = getelementptr inbounds nuw i8, ptr %0, i64 80 + %6 = getelementptr inbounds nuw i8, ptr %0, i64 96 + %7 = getelementptr inbounds nuw i8, ptr %0, i64 112 + %wide.load = load <8 x i16>, ptr %0, align 2, !tbaa !5 + %wide.load17 = load <8 x i16>, ptr %1, align 2, !tbaa !5 + %wide.load18 = load <8 x i16>, ptr %2, align 2, !tbaa !5 + %wide.load19 = load <8 x i16>, ptr %3, align 2, !tbaa !5 + %wide.load20 = load <8 x i16>, ptr %4, align 2, !tbaa !5 + %wide.load21 = load <8 x i16>, ptr %5, align 2, !tbaa !5 + %wide.load22 = load <8 x i16>, ptr %6, align 2, !tbaa !5 + %wide.load23 = load <8 x i16>, ptr %7, align 2, !tbaa !5 + %8 = icmp ne <8 x i16> %wide.load, zeroinitializer + %9 = icmp ne <8 x i16> %wide.load17, zeroinitializer + %10 = icmp ne <8 x i16> %wide.load18, zeroinitializer + %11 = icmp ne <8 x i16> %wide.load19, zeroinitializer + %12 = icmp ne <8 x i16> %wide.load20, zeroinitializer + %13 = icmp ne <8 x i16> %wide.load21, zeroinitializer + %14 = icmp ne <8 x i16> %wide.load22, zeroinitializer + %15 = icmp ne <8 x i16> %wide.load23, zeroinitializer + %16 = zext <8 x i1> %8 to <8 x i32> + %17 = zext <8 x i1> %9 to <8 x i32> + %18 = zext <8 x i1> %10 to <8 x i32> + %19 = zext <8 x i1> %11 to <8 x i32> + %20 = zext <8 x i1> %12 to <8 x i32> + %21 = zext <8 x i1> %13 to <8 x i32> + %22 = zext <8 x i1> %14 to <8 x i32> + %23 = zext <8 x i1> %15 to <8 x i32> + %24 = add <8 x i32> %vec.phi, %16 + %25 = add <8 x i32> %vec.phi10, %17 + %26 = add <8 x i32> %vec.phi11, %18 + %27 = add <8 x i32> %vec.phi12, %19 + %28 = add <8 x i32> %vec.phi13, %20 + %29 = add <8 x i32> %vec.phi14, %21 + %30 = add <8 x i32> %vec.phi15, %22 + %31 = add <8 x i32> %vec.phi16, %23 + %index.next = add nuw i64 %index, 64 + %32 = icmp eq i64 %index.next, %n.vec + br i1 %32, label %middle.block, label %vector.body, !llvm.loop !9 + +middle.block: ; preds = %vector.body + %bin.rdx = add <8 x i32> %25, %24 + %bin.rdx24 = add <8 x i32> %26, %bin.rdx + %bin.rdx25 = add <8 x i32> %27, %bin.rdx24 + %bin.rdx26 = add <8 x i32> %28, %bin.rdx25 + %bin.rdx27 = add <8 x i32> %29, %bin.rdx26 + %bin.rdx28 = add <8 x i32> %30, %bin.rdx27 + %bin.rdx29 = add <8 x i32> %31, %bin.rdx28 + %33 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %bin.rdx29) + %cmp.n = icmp eq i64 %n.vec, %wide.trip.count + br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check + +vec.epilog.iter.check: ; preds = %middle.block + %n.vec.remaining = and i64 %wide.trip.count, 56 + %min.epilog.iters.check = icmp eq i64 %n.vec.remaining, 0 + br i1 %min.epilog.iters.check, label %for.body.preheader, label %vec.epilog.ph + +vec.epilog.ph: ; preds = %vec.epilog.iter.check, %vector.main.loop.iter.check + %vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] + %bc.merge.rdx = phi i32 [ %33, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] + %n.vec31 = and i64 %wide.trip.count, 2147483640 + %34 = insertelement <8 x i32> , i32 %bc.merge.rdx, i64 0 + br label %vec.epilog.vector.body + +vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph + %index32 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next35, %vec.epilog.vector.body ] + %vec.phi33 = phi <8 x i32> [ %34, %vec.epilog.ph ], [ %38, %vec.epilog.vector.body ] + %35 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index32 + %wide.load34 = load <8 x i16>, ptr %35, align 2, !tbaa !5 + %36 = icmp ne <8 x i16> %wide.load34, zeroinitializer + %37 = zext <8 x i1> %36 to <8 x i32> + %38 = add <8 x i32> %vec.phi33, %37 + %index.next35 = add nuw i64 %index32, 8 + %39 = icmp eq i64 %index.next35, %n.vec31 + br i1 %39, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !13 + +vec.epilog.middle.block: ; preds = %vec.epilog.vector.body + %40 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %38) + %cmp.n36 = icmp eq i64 %n.vec31, %wide.trip.count + br i1 %cmp.n36, label %for.cond.cleanup, label %for.body.preheader + +for.cond.cleanup: ; preds = %for.body, %middle.block, %vec.epilog.middle.block, %entry + %num_cols_needed.0.lcssa = phi i32 [ 0, %entry ], [ %33, %middle.block ], [ %40, %vec.epilog.middle.block ], [ %spec.select, %for.body ] + ret i32 %num_cols_needed.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] + %num_cols_needed.06 = phi i32 [ %spec.select, %for.body ], [ %num_cols_needed.06.ph, %for.body.preheader ] + %arrayidx = getelementptr inbounds nuw i16, ptr %colauths, i64 %indvars.iv + %41 = load i16, ptr %arrayidx, align 2, !tbaa !5 + %tobool.not = icmp ne i16 %41, 0 + %inc = zext i1 %tobool.not to i32 + %spec.select = add nuw nsw i32 %num_cols_needed.06, %inc + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !14 +} + +!5 = !{!6, !6, i64 0} +!6 = !{!"short", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C/C++ TBAA"} +!9 = distinct !{!9, !10, !11, !12} +!10 = !{!"llvm.loop.mustprogress"} +!11 = !{!"llvm.loop.isvectorized", i32 1} +!12 = !{!"llvm.loop.unroll.runtime.disable"} +!13 = distinct !{!13, !10, !11, !12} +!14 = distinct !{!14, !10, !12, !11} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index 30e455f57737b..82cc6829838a0 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -624,6 +624,9 @@ # DEBUG-NEXT: G_RESET_FPMODE (opcode {{[0-9]+}}): 0 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_GET_ROUNDING (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll index 22956f8fe3551..9d3fe3a90b463 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -47,9 +47,9 @@ define <4 x i32> @select_addsub_v4i32(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) define <4 x i32> @select_addsub_v4i32_select_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: select_addsub_v4i32_select_swapped: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmnot.m v0, v0 -; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v10, v9, 0 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b @@ -74,9 +74,9 @@ define <4 x i32> @select_addsub_v4i32_add_swapped(<4 x i1> %cc, <4 x i32> %a, <4 define <4 x i32> @select_addsub_v4i32_both_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: select_addsub_v4i32_both_swapped: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmnot.m v0, v0 -; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v10, v9, 0 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 0aa0cbceefc76..317ad0c124e73 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -1022,6 +1022,111 @@ define @vmadc_vv( %a, %b, ret %2 } +define @vmadc_vim( %a, %mask, %b, iXLen %vl) { +; NOVLOPT-LABEL: vmadc_vim: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmadc.vim v11, v8, 5, v0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v11, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmadc_vim: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLOPT-NEXT: vmadc.vim v11, v8, 5, v0 +; VLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLOPT-NEXT: vmand.mm v0, v11, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmadc.carry.in.nxv4i32.i32( %a, i32 5, %mask, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmadc_vxm( %a, %mask, %b, i32 %c, iXLen %vl) { +; NOVLOPT-LABEL: vmadc_vxm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmadc.vxm v11, v8, a0, v0 +; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v11, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmadc_vxm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; VLOPT-NEXT: vmadc.vxm v11, v8, a0, v0 +; VLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLOPT-NEXT: vmand.mm v0, v11, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmadc.carry.in.nxv4i32.i32( %a, i32 %c, %mask, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmadc_vvm( %a, %mask, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmadc_vvm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmadc.vvm v11, v8, v12, v0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v11, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmadc_vvm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLOPT-NEXT: vmadc.vvm v11, v8, v12, v0 +; VLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLOPT-NEXT: vmand.mm v0, v11, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmadc.carry.in.nxv4i32.nxv4i32( %a, %c, %mask, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmsbc_vvm( %a, %mask, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmsbc_vvm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmsbc.vvm v11, v8, v12, v0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v11, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmsbc_vvm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLOPT-NEXT: vmsbc.vvm v11, v8, v12, v0 +; VLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLOPT-NEXT: vmand.mm v0, v11, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmsbc.borrow.in.nxv4i32.nxv4i32( %a, %c, %mask, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmsbc_vxm( %a, %mask, %b, i32 %c, iXLen %vl) { +; NOVLOPT-LABEL: vmsbc_vxm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmsbc.vxm v11, v8, a0, v0 +; NOVLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v11, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmsbc_vxm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; VLOPT-NEXT: vmsbc.vxm v11, v8, a0, v0 +; VLOPT-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLOPT-NEXT: vmand.mm v0, v11, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmsbc.borrow.in.nxv4i32.i32( %a, i32 %c, %mask, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + define @vmsbc_vx( %a, %b, i32 %c, iXLen %vl) { ; NOVLOPT-LABEL: vmsbc_vx: ; NOVLOPT: # %bb.0: @@ -5297,6 +5402,153 @@ define @vfwmaccbf16_vv( %a, %2 } +define @vsbc_vvm( %a, %mask, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vsbc_vvm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsbc.vvm v8, v8, v10, v0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsbc_vvm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLOPT-NEXT: vsbc.vvm v8, v8, v10, v0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v8, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsbc.nxv4i32.nxv4i32.nxv4i1( poison, %a, %b, %mask, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %c, iXLen %vl) + ret %2 +} + +define @vsbc_vxm( %a, %mask, %b, i32 %c, iXLen %vl) { +; NOVLOPT-LABEL: vsbc_vxm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsbc.vxm v8, v8, a0, v0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsbc_vxm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; VLOPT-NEXT: vsbc.vxm v8, v8, a0, v0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsbc.nxv4i32.i32.nxv4i1( poison, %a, i32 %c, %mask, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vfclass_v( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfclass_v: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfclass.v v8, v8 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfclass_v: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLOPT-NEXT: vfclass.v v8, v8 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfclass.nxv4i32( poison, %a, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vrgather_vi( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vrgather_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vrgather.vi v12, v8, 5 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vrgather_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLOPT-NEXT: vrgather.vi v12, v8, 5 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v12, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vrgather.vx.nxv4i32.iXLen( poison, %a, iXLen 5, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vrgather_vv( %a, %idx, %b, iXLen %vl) { +; NOVLOPT-LABEL: vrgather_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vrgather.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vrgather_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLOPT-NEXT: vrgather.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v12, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vrgather.vv.nxv4i32( poison, %a, %idx, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vrgather_vx( %a, iXLen %idx, %b, iXLen %vl) { +; NOVLOPT-LABEL: vrgather_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vrgather.vx v12, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vrgather_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; VLOPT-NEXT: vrgather.vx v12, v8, a0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v12, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vrgather.vx.nxv4i32.iXLen( poison, %a, iXLen %idx, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vrgatherei16_vv( %a, %idx, %b, iXLen %vl) { +; NOVLOPT-LABEL: vrgatherei16_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vrgatherei16.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vrgatherei16_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLOPT-NEXT: vrgatherei16.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v12, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vrgatherei16.vv.nxv4i32( poison, %a, %idx, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + define @vfwmaccbf16_vf( %a, bfloat %b, %c, %d, iXLen %vl) { ; NOVLOPT-LABEL: vfwmaccbf16_vf: ; NOVLOPT: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir index 03204468dc14c..0b95e558d8236 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir @@ -55,3 +55,42 @@ body: | %mask:vmv0 = COPY $v0 %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ ... +--- +name: vnclip_move_past_passthru +body: | + bb.0: + liveins: $x8, $v0, $v8 + ; CHECK-LABEL: name: vnclip_move_past_passthru + ; CHECK: liveins: $x8, $v0, $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 + ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %y:vrnov0 = PseudoVNCLIPU_WV_MF2_MASK %passthru, $noreg, $noreg, %mask, 0, %avl, 5 /* e32 */, 0 /* tu, mu */, implicit-def $vxsat + %avl:gprnox0 = COPY $x8 + %x:vr = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5, 3, implicit-def $vxsat + %passthru:vrnov0 = COPY $v8 + %mask:vmv0 = COPY $v0 + %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ +... +--- +name: vnclip_cant_move_past_passthru +body: | + bb.0: + liveins: $x8, $v0, $v8 + ; CHECK-LABEL: name: vnclip_cant_move_past_passthru + ; CHECK: liveins: $x8, $v0, $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 + ; CHECK-NEXT: %x:vr = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5 /* e32 */, 3 /* ta, ma */, implicit-def $vxsat + ; CHECK-NEXT: %vxsat:gpr = COPY $vxsat + ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ + %avl:gprnox0 = COPY $x8 + %x:vr = PseudoVNCLIPU_WV_MF2 $noreg, $noreg, $noreg, 0, -1, 5, 3, implicit-def $vxsat + %vxsat:gpr = COPY $vxsat + %passthru:vrnov0 = COPY $v8 + %mask:vmv0 = COPY $v0 + %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ +... diff --git a/llvm/test/CodeGen/RISCV/select-cond.ll b/llvm/test/CodeGen/RISCV/select-cond.ll index d9f9ad379ee95..59f4d95f45acc 100644 --- a/llvm/test/CodeGen/RISCV/select-cond.ll +++ b/llvm/test/CodeGen/RISCV/select-cond.ll @@ -5,6 +5,8 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-THEAD ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=RV32-XQCICM +; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcics -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32-XQCICS ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=RV64 ; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \ @@ -35,6 +37,12 @@ define signext i32 @select_i32_trunc(i32 signext %cond, i32 signext %x, i32 sign ; RV32-XQCICM-NEXT: mv a0, a1 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_trunc: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: andi a0, a0, 1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a1, a2 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_trunc: ; RV64: # %bb.0: ; RV64-NEXT: andi a3, a0, 1 @@ -80,6 +88,12 @@ define signext i32 @select_i32_param(i1 signext %cond, i32 signext %x, i32 signe ; RV32-XQCICM-NEXT: mv a0, a1 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_param: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: andi a0, a0, 1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a1, a2 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_param: ; RV64: # %bb.0: ; RV64-NEXT: andi a3, a0, 1 @@ -122,6 +136,13 @@ define signext i32 @select_i32_eq(i32 signext %a, i32 signext %b, i32 signext %x ; RV32-XQCICM-NEXT: mv a0, a3 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_eq: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: xor a0, a0, a1 +; RV32-XQCICS-NEXT: seqz a0, a0 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a2, a3 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_eq: ; RV64: # %bb.0: ; RV64-NEXT: beq a0, a1, .LBB2_2 @@ -164,6 +185,13 @@ define signext i32 @select_i32_ne(i32 signext %a, i32 signext %b, i32 signext %x ; RV32-XQCICM-NEXT: mv a0, a3 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_ne: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: xor a0, a0, a1 +; RV32-XQCICS-NEXT: snez a0, a0 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a2, a3 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_ne: ; RV64: # %bb.0: ; RV64-NEXT: bne a0, a1, .LBB3_2 @@ -206,6 +234,12 @@ define signext i32 @select_i32_ugt(i32 signext %a, i32 signext %b, i32 signext % ; RV32-XQCICM-NEXT: mv a0, a3 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_ugt: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: sltu a0, a1, a0 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a2, a3 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_ugt: ; RV64: # %bb.0: ; RV64-NEXT: bltu a1, a0, .LBB4_2 @@ -248,6 +282,12 @@ define signext i32 @select_i32_uge(i32 signext %a, i32 signext %b, i32 signext % ; RV32-XQCICM-NEXT: mv a0, a2 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_uge: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: sltu a0, a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a3, a2 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_uge: ; RV64: # %bb.0: ; RV64-NEXT: bgeu a0, a1, .LBB5_2 @@ -290,6 +330,12 @@ define signext i32 @select_i32_ult(i32 signext %a, i32 signext %b, i32 signext % ; RV32-XQCICM-NEXT: mv a0, a3 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_ult: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: sltu a0, a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a2, a3 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_ult: ; RV64: # %bb.0: ; RV64-NEXT: bltu a0, a1, .LBB6_2 @@ -332,6 +378,12 @@ define signext i32 @select_i32_ule(i32 signext %a, i32 signext %b, i32 signext % ; RV32-XQCICM-NEXT: mv a0, a2 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_ule: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: sltu a0, a1, a0 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a3, a2 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_ule: ; RV64: # %bb.0: ; RV64-NEXT: bgeu a1, a0, .LBB7_2 @@ -374,6 +426,12 @@ define signext i32 @select_i32_sgt(i32 signext %a, i32 signext %b, i32 signext % ; RV32-XQCICM-NEXT: mv a0, a3 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_sgt: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: slt a0, a1, a0 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a2, a3 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_sgt: ; RV64: # %bb.0: ; RV64-NEXT: blt a1, a0, .LBB8_2 @@ -416,6 +474,12 @@ define signext i32 @select_i32_sge(i32 signext %a, i32 signext %b, i32 signext % ; RV32-XQCICM-NEXT: mv a0, a2 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_sge: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: slt a0, a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a3, a2 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_sge: ; RV64: # %bb.0: ; RV64-NEXT: bge a0, a1, .LBB9_2 @@ -458,6 +522,12 @@ define signext i32 @select_i32_slt(i32 signext %a, i32 signext %b, i32 signext % ; RV32-XQCICM-NEXT: mv a0, a3 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_slt: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: slt a0, a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a2, a3 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_slt: ; RV64: # %bb.0: ; RV64-NEXT: blt a0, a1, .LBB10_2 @@ -500,6 +570,12 @@ define signext i32 @select_i32_sle(i32 signext %a, i32 signext %b, i32 signext % ; RV32-XQCICM-NEXT: mv a0, a2 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i32_sle: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: slt a0, a1, a0 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a3, a2 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i32_sle: ; RV64: # %bb.0: ; RV64-NEXT: bge a1, a0, .LBB11_2 @@ -550,6 +626,14 @@ define i64 @select_i64_trunc(i64 %cond, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a0, a2 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_trunc: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: andi a1, a0, 1 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a3, a5 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a2, a4 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_trunc: ; RV64: # %bb.0: ; RV64-NEXT: andi a3, a0, 1 @@ -601,6 +685,15 @@ define i64 @select_i64_param(i1 %cond, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a2 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_param: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: andi a5, a0, 1 +; RV32-XQCICS-NEXT: mv a0, a5 +; RV32-XQCICS-NEXT: qc.selectnei a5, 0, a2, a4 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a1, a3 +; RV32-XQCICS-NEXT: mv a1, a5 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_param: ; RV64: # %bb.0: ; RV64-NEXT: andi a3, a0, 1 @@ -657,6 +750,16 @@ define i64 @select_i64_eq(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a7 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_eq: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: xor a0, a0, a2 +; RV32-XQCICS-NEXT: or a1, a0, a1 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a4, a6 +; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a5, a7 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_eq: ; RV64: # %bb.0: ; RV64-NEXT: beq a0, a1, .LBB14_2 @@ -713,6 +816,16 @@ define i64 @select_i64_ne(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a7 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_ne: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: xor a0, a0, a2 +; RV32-XQCICS-NEXT: or a1, a0, a1 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a4, a6 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a5, a7 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_ne: ; RV64: # %bb.0: ; RV64-NEXT: bne a0, a1, .LBB15_2 @@ -774,6 +887,18 @@ define i64 @select_i64_ugt(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a5 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_ugt: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: sltu t0, a3, a1 +; RV32-XQCICS-NEXT: sltu a0, a2, a0 +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: seqz a1, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a0, t0 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a4, a6 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a5, a7 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_ugt: ; RV64: # %bb.0: ; RV64-NEXT: bltu a1, a0, .LBB16_2 @@ -835,6 +960,18 @@ define i64 @select_i64_uge(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a7 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_uge: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: sltu t0, a1, a3 +; RV32-XQCICS-NEXT: sltu a0, a0, a2 +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: seqz a1, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a0, t0 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a6, a4 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a7, a5 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_uge: ; RV64: # %bb.0: ; RV64-NEXT: bgeu a0, a1, .LBB17_2 @@ -896,6 +1033,18 @@ define i64 @select_i64_ult(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a5 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_ult: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: sltu t0, a1, a3 +; RV32-XQCICS-NEXT: sltu a0, a0, a2 +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: seqz a1, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a0, t0 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a4, a6 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a5, a7 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_ult: ; RV64: # %bb.0: ; RV64-NEXT: bltu a0, a1, .LBB18_2 @@ -957,6 +1106,18 @@ define i64 @select_i64_ule(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a7 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_ule: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: sltu t0, a3, a1 +; RV32-XQCICS-NEXT: sltu a0, a2, a0 +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: seqz a1, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a0, t0 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a6, a4 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a7, a5 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_ule: ; RV64: # %bb.0: ; RV64-NEXT: bgeu a1, a0, .LBB19_2 @@ -1018,6 +1179,18 @@ define i64 @select_i64_sgt(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a5 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_sgt: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: slt t0, a3, a1 +; RV32-XQCICS-NEXT: sltu a0, a2, a0 +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: seqz a1, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a0, t0 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a4, a6 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a5, a7 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_sgt: ; RV64: # %bb.0: ; RV64-NEXT: blt a1, a0, .LBB20_2 @@ -1079,6 +1252,18 @@ define i64 @select_i64_sge(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a7 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_sge: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: slt t0, a1, a3 +; RV32-XQCICS-NEXT: sltu a0, a0, a2 +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: seqz a1, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a0, t0 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a6, a4 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a7, a5 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_sge: ; RV64: # %bb.0: ; RV64-NEXT: bge a0, a1, .LBB21_2 @@ -1140,6 +1325,18 @@ define i64 @select_i64_slt(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a5 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_slt: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: slt t0, a1, a3 +; RV32-XQCICS-NEXT: sltu a0, a0, a2 +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: seqz a1, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a0, t0 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a4, a6 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a5, a7 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_slt: ; RV64: # %bb.0: ; RV64-NEXT: blt a0, a1, .LBB22_2 @@ -1201,6 +1398,18 @@ define i64 @select_i64_sle(i64 %a, i64 %b, i64 %x, i64 %y) nounwind { ; RV32-XQCICM-NEXT: mv a1, a7 ; RV32-XQCICM-NEXT: ret ; +; RV32-XQCICS-LABEL: select_i64_sle: +; RV32-XQCICS: # %bb.0: +; RV32-XQCICS-NEXT: slt t0, a3, a1 +; RV32-XQCICS-NEXT: sltu a0, a2, a0 +; RV32-XQCICS-NEXT: xor a1, a1, a3 +; RV32-XQCICS-NEXT: seqz a1, a1 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a0, t0 +; RV32-XQCICS-NEXT: mv a0, a1 +; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a6, a4 +; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a7, a5 +; RV32-XQCICS-NEXT: ret +; ; RV64-LABEL: select_i64_sle: ; RV64: # %bb.0: ; RV64-NEXT: bge a1, a0, .LBB23_2 diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index 1a978d1a0fcac..9c8230572b926 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zknh,+v -target-abi=lp64f \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \ ; RUN: -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL define void @test1(i32 signext %arg, i32 signext %arg1) nounwind { @@ -1499,3 +1499,186 @@ bb7: ; preds = %bb2 } declare i32 @llvm.riscv.vmv.x.s.nxv1i32( ) + +; Test that we can look through brev8 in hasAllNBitUsers. +define signext i32 @test21(i64 %arg1, i64 %arg2, i64 %arg3) { +; RV64I-LABEL: test21: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: lui a3, 61681 +; RV64I-NEXT: lui a4, 209715 +; RV64I-NEXT: addi a3, a3, -241 +; RV64I-NEXT: addi a4, a4, 819 +; RV64I-NEXT: slli a5, a3, 32 +; RV64I-NEXT: add a3, a3, a5 +; RV64I-NEXT: slli a5, a4, 32 +; RV64I-NEXT: add a4, a4, a5 +; RV64I-NEXT: li a5, 256 +; RV64I-NEXT: .LBB25_1: # %bb2 +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: srli a6, a0, 4 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a6, a6, a3 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: or a0, a6, a0 +; RV64I-NEXT: srli a6, a0, 2 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: and a6, a6, a4 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, a6, a0 +; RV64I-NEXT: andi a6, a0, 65 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: slli a6, a6, 1 +; RV64I-NEXT: andi a0, a0, 1104 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: addi a2, a2, 1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: bltu a2, a5, .LBB25_1 +; RV64I-NEXT: # %bb.2: # %bb7 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test21: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: addi a2, a2, -1 +; RV64ZBB-NEXT: li a3, 256 +; RV64ZBB-NEXT: .LBB25_1: # %bb2 +; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64ZBB-NEXT: brev8 a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 1234 +; RV64ZBB-NEXT: addi a2, a2, 1 +; RV64ZBB-NEXT: addw a0, a0, a1 +; RV64ZBB-NEXT: bltu a2, a3, .LBB25_1 +; RV64ZBB-NEXT: # %bb.2: # %bb7 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test21: +; NOREMOVAL: # %bb.0: # %entry +; NOREMOVAL-NEXT: addi a2, a2, -1 +; NOREMOVAL-NEXT: li a3, 256 +; NOREMOVAL-NEXT: .LBB25_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: brev8 a0, a0 +; NOREMOVAL-NEXT: andi a0, a0, 1234 +; NOREMOVAL-NEXT: addi a2, a2, 1 +; NOREMOVAL-NEXT: add a0, a0, a1 +; NOREMOVAL-NEXT: bltu a2, a3, .LBB25_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret +entry: + br label %bb2 + +bb2: ; preds = %bb2, %entry + %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ] + %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ] + %i3 = add i64 %i2, 1 + %bswap = call i64 @llvm.bswap.i64(i64 %i1) + %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap) + %i4 = and i64 %bitreverse, 1234 + %i5 = add i64 %i4, %arg2 + %i6 = icmp ugt i64 %i2, 255 + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + %i7 = trunc i64 %i5 to i32 + ret i32 %i7 +} + +; Negative test for looking through brev8. Make sure we consider that it works +; on bytes. +define signext i32 @test22(i64 %arg1, i64 %arg2, i64 %arg3) { +; RV64I-LABEL: test22: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: lui a3, %hi(.LCPI26_0) +; RV64I-NEXT: lui a4, %hi(.LCPI26_1) +; RV64I-NEXT: lui a5, %hi(.LCPI26_2) +; RV64I-NEXT: lui a6, %hi(.LCPI26_3) +; RV64I-NEXT: li a7, 69 +; RV64I-NEXT: ld a3, %lo(.LCPI26_0)(a3) +; RV64I-NEXT: ld a4, %lo(.LCPI26_1)(a4) +; RV64I-NEXT: ld a5, %lo(.LCPI26_2)(a5) +; RV64I-NEXT: ld a6, %lo(.LCPI26_3)(a6) +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: li t0, 65 +; RV64I-NEXT: slli t0, t0, 28 +; RV64I-NEXT: li t1, 256 +; RV64I-NEXT: .LBB26_1: # %bb2 +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli t2, a0, 11 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: and t2, t2, a3 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: or a0, a0, t2 +; RV64I-NEXT: srli t2, a0, 2 +; RV64I-NEXT: and a0, a0, a6 +; RV64I-NEXT: and t2, t2, a5 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, t2, a0 +; RV64I-NEXT: srli t2, a0, 1 +; RV64I-NEXT: and a0, a0, t0 +; RV64I-NEXT: and t2, t2, a7 +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: or a0, t2, a0 +; RV64I-NEXT: srli a0, a0, 28 +; RV64I-NEXT: addi a2, a2, 1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: bltu a2, t1, .LBB26_1 +; RV64I-NEXT: # %bb.2: # %bb7 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test22: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: addi a2, a2, -1 +; RV64ZBB-NEXT: li a3, 256 +; RV64ZBB-NEXT: .LBB26_1: # %bb2 +; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64ZBB-NEXT: slli a0, a0, 7 +; RV64ZBB-NEXT: brev8 a0, a0 +; RV64ZBB-NEXT: srli a0, a0, 28 +; RV64ZBB-NEXT: andi a0, a0, 1234 +; RV64ZBB-NEXT: addi a2, a2, 1 +; RV64ZBB-NEXT: add a0, a0, a1 +; RV64ZBB-NEXT: bltu a2, a3, .LBB26_1 +; RV64ZBB-NEXT: # %bb.2: # %bb7 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test22: +; NOREMOVAL: # %bb.0: # %entry +; NOREMOVAL-NEXT: addi a2, a2, -1 +; NOREMOVAL-NEXT: li a3, 256 +; NOREMOVAL-NEXT: .LBB26_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: slli a0, a0, 7 +; NOREMOVAL-NEXT: brev8 a0, a0 +; NOREMOVAL-NEXT: srli a0, a0, 28 +; NOREMOVAL-NEXT: andi a0, a0, 1234 +; NOREMOVAL-NEXT: addi a2, a2, 1 +; NOREMOVAL-NEXT: add a0, a0, a1 +; NOREMOVAL-NEXT: bltu a2, a3, .LBB26_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret +entry: + br label %bb2 + +bb2: ; preds = %bb2, %entry + %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ] + %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ] + %i3 = add i64 %i2, 1 + %shl = shl i64 %i1, 7 + %bswap = call i64 @llvm.bswap.i64(i64 %shl) + %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap) + %lshr = lshr i64 %bitreverse, 28 + %i4 = and i64 %lshr, 1234 + %i5 = add i64 %i4, %arg2 + %i6 = icmp ugt i64 %i2, 255 + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + %i7 = trunc i64 %i5 to i32 + ret i32 %i7 +} diff --git a/llvm/test/CodeGen/RISCV/xqciac.ll b/llvm/test/CodeGen/RISCV/xqciac.ll new file mode 100644 index 0000000000000..4cee0910608f3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xqciac.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IM +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-xqciac -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IMXQCIAC +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-xqciac,+zba -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IZBAMXQCIAC + +define dso_local i32 @mul(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: mul: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a0, a1, 5 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: mul: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: li a0, 33 +; RV32IMXQCIAC-NEXT: mul a0, a1, a0 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: mul: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: li a0, 33 +; RV32IZBAMXQCIAC-NEXT: mul a0, a1, a0 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 33 + ret i32 %mul +} + +define dso_local i32 @muliadd(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: muliadd: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: li a2, 165 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: muliadd: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: qc.muliadd a0, a1, 165 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: muliadd: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: qc.muliadd a0, a1, 165 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 165 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +define dso_local i32 @muliadd2(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: muliadd2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: li a2, 1111 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: muliadd2: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: qc.muliadd a0, a1, 1111 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: muliadd2: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: qc.muliadd a0, a1, 1111 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 1111 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +define dso_local i32 @muliadd_neg(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: muliadd_neg: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: li a2, -165 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: muliadd_neg: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: qc.muliadd a0, a1, -165 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: muliadd_neg: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: qc.muliadd a0, a1, -165 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, -165 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +define dso_local i32 @muliadd_neg2(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: muliadd_neg2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: li a2, -2045 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: muliadd_neg2: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: qc.muliadd a0, a1, -2045 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: muliadd_neg2: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: qc.muliadd a0, a1, -2045 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, -2045 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +define dso_local i32 @pow2immplus1(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: pow2immplus1: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a2, a1, 5 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: add a0, a2, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: pow2immplus1: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: qc.muliadd a0, a1, 33 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: pow2immplus1: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: qc.muliadd a0, a1, 33 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 33 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +define dso_local i32 @pow2immminus2(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: pow2immminus2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a2, a1, 1 +; RV32IM-NEXT: slli a1, a1, 7 +; RV32IM-NEXT: sub a1, a1, a2 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: pow2immminus2: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: qc.muliadd a0, a1, 126 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: pow2immminus2: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: qc.muliadd a0, a1, 126 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 126 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +define dso_local i32 @pow2minuspow2(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: pow2minuspow2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a2, a1, 7 +; RV32IM-NEXT: slli a1, a1, 9 +; RV32IM-NEXT: sub a1, a1, a2 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: pow2minuspow2: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: qc.muliadd a0, a1, 384 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: pow2minuspow2: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: qc.muliadd a0, a1, 384 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 384 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +define dso_local i32 @gtsimm12(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: gtsimm12: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: lui a2, 1 +; RV32IM-NEXT: addi a2, a2, 477 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: gtsimm12: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: lui a2, 1 +; RV32IMXQCIAC-NEXT: addi a2, a2, 477 +; RV32IMXQCIAC-NEXT: mul a1, a1, a2 +; RV32IMXQCIAC-NEXT: add a0, a0, a1 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: gtsimm12: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: lui a2, 1 +; RV32IZBAMXQCIAC-NEXT: addi a2, a2, 477 +; RV32IZBAMXQCIAC-NEXT: mul a1, a1, a2 +; RV32IZBAMXQCIAC-NEXT: add a0, a0, a1 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 4573 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +; NOTE: This will become qc.shladd once support is added +define dso_local i32 @pow2(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: pow2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a1, a1, 5 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: pow2: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: slli a1, a1, 5 +; RV32IMXQCIAC-NEXT: add a0, a0, a1 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: pow2: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: slli a1, a1, 5 +; RV32IZBAMXQCIAC-NEXT: add a0, a0, a1 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 32 + %add = add nsw i32 %mul, %a + ret i32 %add +} + +define dso_local i32 @shxadd(i32 %a, i32 %b) local_unnamed_addr #0 { +; RV32IM-LABEL: shxadd: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: slli a1, a1, 1 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: ret +; +; RV32IMXQCIAC-LABEL: shxadd: +; RV32IMXQCIAC: # %bb.0: # %entry +; RV32IMXQCIAC-NEXT: slli a1, a1, 1 +; RV32IMXQCIAC-NEXT: add a0, a0, a1 +; RV32IMXQCIAC-NEXT: ret +; +; RV32IZBAMXQCIAC-LABEL: shxadd: +; RV32IZBAMXQCIAC: # %bb.0: # %entry +; RV32IZBAMXQCIAC-NEXT: sh1add a0, a1, a0 +; RV32IZBAMXQCIAC-NEXT: ret +entry: + %mul = mul nsw i32 %b, 2 + %add = add nsw i32 %mul, %a + ret i32 %add +} diff --git a/llvm/test/CodeGen/RISCV/xqcics.ll b/llvm/test/CodeGen/RISCV/xqcics.ll new file mode 100644 index 0000000000000..0e90b1fda0ea2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xqcics.ll @@ -0,0 +1,452 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Test that we are able to generate the Xqcics instructions +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcics -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32IXQCICS +; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcics,+experimental-xqcicm -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32IXQCICS + +define i32 @select_cc_example_eq_s1(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq_s1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: andi a1, a0, 1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: bnez a1, .LBB0_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a0, 12 +; RV32I-NEXT: .LBB0_2: # %entry +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq_s1: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: andi a0, a0, 1 +; RV32IXQCICS-NEXT: qc.selectinei a0, 0, a2, 12 +; RV32IXQCICS-NEXT: ret +entry: + %cond_trunc = trunc i32 %a to i1 + %sel = select i1 %cond_trunc, i32 %x, i32 12 + ret i32 %sel +} + +define i32 @select_cc_example_eq_s2(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq_s2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: andi a1, a0, 1 +; RV32I-NEXT: bnez a1, .LBB1_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: li a0, 12 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq_s2: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: andi a0, a0, 1 +; RV32IXQCICS-NEXT: qc.selectieqi a0, 0, a2, 12 +; RV32IXQCICS-NEXT: ret +entry: + %cond_trunc = trunc i32 %a to i1 + %sel = select i1 %cond_trunc, i32 12, i32 %x + ret i32 %sel +} + +define i32 @select_cc_example_eq_s3(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq_s3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: bnez a0, .LBB2_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a0, 25 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: li a0, 12 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq_s3: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: andi a0, a0, 1 +; RV32IXQCICS-NEXT: li a1, 25 +; RV32IXQCICS-NEXT: qc.selectieqi a0, 0, a1, 12 +; RV32IXQCICS-NEXT: ret +entry: + %cond_trunc = trunc i32 %a to i1 + %sel = select i1 %cond_trunc, i32 12, i32 25 + ret i32 %sel +} + +define i32 @select_cc_example_eq(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: beq a0, a1, .LBB3_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: .LBB3_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selecteqi a0, 11, a2, a3 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 %a, 11 + %sel = select i1 %cmp, i32 %x, i32 %y + ret i32 %sel +} + +define i32 @select_cc_example_eq_c(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: beq a0, a1, .LBB4_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: .LBB4_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq_c: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selecteqi a0, 11, a2, a3 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 11, %a + %sel = select i1 %cmp, i32 %x, i32 %y + ret i32 %sel +} + +define i32 @select_cc_example_ne(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ne: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: bne a0, a1, .LBB5_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: .LBB5_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ne: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectnei a0, 11, a2, a3 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 %a, 11 + %sel = select i1 %cmp, i32 %x, i32 %y + ret i32 %sel +} + +define i32 @select_cc_example_ne_c(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ne_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: bne a0, a1, .LBB6_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: .LBB6_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ne_c: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectnei a0, 11, a2, a3 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 11, %a + %sel = select i1 %cmp, i32 %x, i32 %y + ret i32 %sel +} + +define i32 @select_cc_example_eqi(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eqi: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: beq a0, a1, .LBB7_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB7_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eqi: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectieq a0, a1, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 %a, %b + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_eqi_c(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eqi_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bne a0, a1, .LBB8_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB8_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eqi_c: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectine a0, a1, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 %a, %b + %sel = select i1 %cmp, i32 11, i32 %x + ret i32 %sel +} + +define i32 @select_cc_example_nei(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_nei: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bne a0, a1, .LBB9_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB9_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_nei: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectine a0, a1, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 %a, %b + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_nei_c(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_nei_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: beq a0, a1, .LBB10_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB10_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_nei_c: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectieq a0, a1, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 %a, %b + %sel = select i1 %cmp, i32 11, i32 %x + ret i32 %sel +} + +define i32 @select_cc_example_ieqi(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ieqi: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: beq a0, a1, .LBB11_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB11_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ieqi: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectieqi a0, 12, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 %a, 12 + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_ieqi_c1(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ieqi_c1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: beq a0, a1, .LBB12_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB12_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ieqi_c1: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectieqi a0, 12, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 12, %a + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_ieqi_c2(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ieqi_c2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: bne a0, a1, .LBB13_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB13_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ieqi_c2: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectinei a0, 12, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 %a, 12 + %sel = select i1 %cmp, i32 11, i32 %x + ret i32 %sel +} + +define i32 @select_cc_example_ieqi_c3(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ieqi_c3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: bne a0, a1, .LBB14_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB14_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ieqi_c3: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectinei a0, 12, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 12, %a + %sel = select i1 %cmp, i32 11, i32 %x + ret i32 %sel +} + +define i32 @select_cc_example_inei(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_inei: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: bne a0, a1, .LBB15_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB15_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_inei: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectinei a0, 12, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 %a, 12 + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_inei_c1(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_inei_c1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: bne a0, a1, .LBB16_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB16_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_inei_c1: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectinei a0, 12, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 12, %a + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_inei_c2(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_inei_c2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: beq a0, a1, .LBB17_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB17_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_inei_c2: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectieqi a0, 12, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 %a, 12 + %sel = select i1 %cmp, i32 11, i32 %x + ret i32 %sel +} + +define i32 @select_cc_example_inei_c3(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_inei_c3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: beq a0, a1, .LBB18_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB18_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_inei_c3: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectieqi a0, 12, a2, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 12, %a + %sel = select i1 %cmp, i32 11, i32 %x + ret i32 %sel +} + +define i32 @select_cc_example_eqii(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eqii: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: beq a0, a1, .LBB19_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a0, 11 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB19_2: +; RV32I-NEXT: li a0, 13 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eqii: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectiieq a0, a1, 13, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp eq i32 %a, %b + %sel = select i1 %cmp, i32 13, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_neii(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_neii: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bne a0, a1, .LBB20_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a0, 11 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB20_2: +; RV32I-NEXT: li a0, 13 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_neii: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectiine a0, a1, 13, 11 +; RV32IXQCICS-NEXT: ret +entry: + %cmp = icmp ne i32 %a, %b + %sel = select i1 %cmp, i32 13, i32 11 + ret i32 %sel +} + diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll index 9194d7842a6d3..9772c8311bfbc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll @@ -6,8 +6,7 @@ define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(ptr %pSrc, i32 %block ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: subs r2, r1, #4 -; CHECK-NEXT: movw r3, #0 +; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: movt r3, #65408 ; CHECK-NEXT: vdup.32 q0, r3 ; CHECK-NEXT: dlstp.32 lr, r1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll index 6b5b6b2b1b677..573a9420b5278 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -5,7 +5,6 @@ define void @arm_cmplx_mag_squared_q15_mve(ptr %pSrc, ptr %pDst, i32 %blockSize) ; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: subs.w r3, r2, #8 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 1feb5feb7a9ee..7190e162eb010 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -107,11 +107,9 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: v128.const 0, 0 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: # fallthrough-return @@ -1558,11 +1556,9 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: v128.const 0, 0 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: # fallthrough-return diff --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll index 715e73e6c18f2..bb06445b2dcf0 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-select.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll @@ -552,3 +552,73 @@ define <2 x double> @select_eq_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) { %res = select i1 %c, <2 x double> %x, <2 x double> %y ret <2 x double> %res } + +define <4 x i32> @select_splat_first_zero_and_icmp(<4 x i32> %x) { +; CHECK-LABEL: select_splat_first_zero_and_icmp: +; CHECK: .functype select_splat_first_zero_and_icmp (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.const 2139095040, 2139095040, 2139095040, 2139095040 +; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: i32x4.ne +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: # fallthrough-return + %a = and <4 x i32> %x, splat (i32 2139095040) + %c = icmp eq <4 x i32> %a, zeroinitializer + %res = select <4 x i1> %c, <4 x i32> zeroinitializer, <4 x i32> %x + ret <4 x i32> %res +} + +define <4 x i32> @select_splat_second_zero_and_icmp(<4 x i32> %x) { +; CHECK-LABEL: select_splat_second_zero_and_icmp: +; CHECK: .functype select_splat_second_zero_and_icmp (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.const 2139095040, 2139095040, 2139095040, 2139095040 +; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: i32x4.eq +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: # fallthrough-return + %a = and <4 x i32> %x, splat (i32 2139095040) + %c = icmp eq <4 x i32> %a, zeroinitializer + %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %res +} + +define <4 x i32> @select_splat_first_zero_cond_input(<4 x i1> %c, <4 x i32> %x) { +; CHECK-LABEL: select_splat_first_zero_cond_input: +; CHECK: .functype select_splat_first_zero_cond_input (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32x4.shl +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32x4.shr_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: # fallthrough-return + %res = select <4 x i1> %c, <4 x i32> zeroinitializer, <4 x i32> %x + ret <4 x i32> %res +} + +define <4 x i32> @select_splat_second_zero_cond_input(<4 x i1> %c, <4 x i32> %x) { +; CHECK-LABEL: select_splat_second_zero_cond_input: +; CHECK: .functype select_splat_second_zero_cond_input (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32x4.shl +; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32x4.shr_s +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: v128.and +; CHECK-NEXT: # fallthrough-return + %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %res +} + diff --git a/llvm/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/llvm/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll index 2cf09a936f592..3ad6492978438 100644 --- a/llvm/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll +++ b/llvm/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll @@ -8,9 +8,8 @@ entry: ret x86_fp80 %tmp2 ; CHECK-LABEL: foo: -; CHECK: fldt 16(%esp) +; CHECK: fldt 4(%esp) ; CHECK-NEXT: fsqrt -; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: ret } @@ -21,11 +20,10 @@ entry: %tmp2 = call x86_fp80 @llvm.powi.f80.i32( x86_fp80 %x, i32 3 ) ret x86_fp80 %tmp2 ; CHECK-LABEL: bar: -; CHECK: fldt 16(%esp) +; CHECK: fldt 4(%esp) ; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: fmul %st(1) ; CHECK-NEXT: fmulp -; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: ret } diff --git a/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir b/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir index fe1235fe94f85..1f38430f631cc 100644 --- a/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir +++ b/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir @@ -35,3 +35,24 @@ body: | %0:gr64_with_sub_8bit = COPY %1:gr64_with_sub_8bit RET 0, implicit %0 ... +# Commuting instruction with 3 ops is handled correctly. +--- +name: commuting_3_ops +tracksRegLiveness: true +body: | + bb.0: + liveins: $ymm0, $ymm1 + + ; CHECK-LABEL: name: commuting_3_ops + ; CHECK: liveins: $ymm0, $ymm1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr256 = COPY $ymm1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = COPY $ymm0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = contract nofpexcept VFMADD213PSYr [[COPY1]], [[COPY]], [[COPY]], implicit $mxcsr + ; CHECK-NEXT: RET 0, implicit [[COPY1]] + %0:vr256 = COPY $ymm1 + %1:vr256 = COPY $ymm0 + %0:vr256 = contract nofpexcept VFMADD231PSYr %0:vr256, %0:vr256, %1:vr256, implicit $mxcsr + %1:vr256 = COPY %0:vr256 + RET 0, implicit %1 +... diff --git a/llvm/test/CodeGen/X86/flt-rounds.ll b/llvm/test/CodeGen/X86/flt-rounds.ll index a5908978a5438..1d7a8d8456c27 100644 --- a/llvm/test/CodeGen/X86/flt-rounds.ll +++ b/llvm/test/CodeGen/X86/flt-rounds.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s --check-prefix=X86 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse2 -verify-machineinstrs < %s | FileCheck %s --check-prefix=X86 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,SDAG-X86 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,SDAG-X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc -mtriple=i686-unknown-linux-gnu -global-isel=1 -global-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,GISEL-X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -global-isel=1 -global-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X64,GISEL-X64 declare i32 @llvm.get.rounding() @@ -37,139 +39,309 @@ define i32 @test_flt_rounds() nounwind { ; Make sure we preserve order with fesetround. define i32 @multiple_flt_rounds() nounwind { -; X86-LABEL: multiple_flt_rounds: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp -; X86-NEXT: movl $1024, (%esp) # imm = 0x400 -; X86-NEXT: calll fesetround -; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andb $6, %cl -; X86-NEXT: movl $45, %esi -; X86-NEXT: movl $45, %eax -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $3, %eax -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: cmpl $3, %eax -; X86-NEXT: setne %bl -; X86-NEXT: movl $0, (%esp) -; X86-NEXT: calll fesetround -; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andb $6, %cl -; X86-NEXT: movl $45, %eax -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $3, %eax -; X86-NEXT: cmpl $1, %eax -; X86-NEXT: je .LBB1_2 -; X86-NEXT: # %bb.1: # %entry -; X86-NEXT: incl %ebx -; X86-NEXT: .LBB1_2: # %entry -; X86-NEXT: movl $3072, (%esp) # imm = 0xC00 -; X86-NEXT: calll fesetround -; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andb $6, %cl -; X86-NEXT: movl $45, %eax -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $3, %eax -; X86-NEXT: cmpl $1, %eax -; X86-NEXT: sbbl $-1, %ebx -; X86-NEXT: movl $2048, (%esp) # imm = 0x800 -; X86-NEXT: calll fesetround -; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andb $6, %cl -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: andl $3, %esi -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpl $2, %esi -; X86-NEXT: setne %cl -; X86-NEXT: negl %ecx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %ecx, %ebx -; X86-NEXT: setne %al -; X86-NEXT: addl $20, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %ebx -; X86-NEXT: retl +; SDAG-X86-LABEL: multiple_flt_rounds: +; SDAG-X86: # %bb.0: # %entry +; SDAG-X86-NEXT: pushl %ebx +; SDAG-X86-NEXT: pushl %esi +; SDAG-X86-NEXT: subl $20, %esp +; SDAG-X86-NEXT: movl $1024, (%esp) # imm = 0x400 +; SDAG-X86-NEXT: calll fesetround +; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SDAG-X86-NEXT: shrl $9, %ecx +; SDAG-X86-NEXT: andb $6, %cl +; SDAG-X86-NEXT: movl $45, %esi +; SDAG-X86-NEXT: movl $45, %eax +; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X86-NEXT: shrl %cl, %eax +; SDAG-X86-NEXT: andl $3, %eax +; SDAG-X86-NEXT: xorl %ebx, %ebx +; SDAG-X86-NEXT: cmpl $3, %eax +; SDAG-X86-NEXT: setne %bl +; SDAG-X86-NEXT: movl $0, (%esp) +; SDAG-X86-NEXT: calll fesetround +; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SDAG-X86-NEXT: shrl $9, %ecx +; SDAG-X86-NEXT: andb $6, %cl +; SDAG-X86-NEXT: movl $45, %eax +; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X86-NEXT: shrl %cl, %eax +; SDAG-X86-NEXT: andl $3, %eax +; SDAG-X86-NEXT: cmpl $1, %eax +; SDAG-X86-NEXT: je .LBB1_2 +; SDAG-X86-NEXT: # %bb.1: # %entry +; SDAG-X86-NEXT: incl %ebx +; SDAG-X86-NEXT: .LBB1_2: # %entry +; SDAG-X86-NEXT: movl $3072, (%esp) # imm = 0xC00 +; SDAG-X86-NEXT: calll fesetround +; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SDAG-X86-NEXT: shrl $9, %ecx +; SDAG-X86-NEXT: andb $6, %cl +; SDAG-X86-NEXT: movl $45, %eax +; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X86-NEXT: shrl %cl, %eax +; SDAG-X86-NEXT: andl $3, %eax +; SDAG-X86-NEXT: cmpl $1, %eax +; SDAG-X86-NEXT: sbbl $-1, %ebx +; SDAG-X86-NEXT: movl $2048, (%esp) # imm = 0x800 +; SDAG-X86-NEXT: calll fesetround +; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SDAG-X86-NEXT: shrl $9, %ecx +; SDAG-X86-NEXT: andb $6, %cl +; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X86-NEXT: shrl %cl, %esi +; SDAG-X86-NEXT: andl $3, %esi +; SDAG-X86-NEXT: xorl %ecx, %ecx +; SDAG-X86-NEXT: cmpl $2, %esi +; SDAG-X86-NEXT: setne %cl +; SDAG-X86-NEXT: negl %ecx +; SDAG-X86-NEXT: xorl %eax, %eax +; SDAG-X86-NEXT: cmpl %ecx, %ebx +; SDAG-X86-NEXT: setne %al +; SDAG-X86-NEXT: addl $20, %esp +; SDAG-X86-NEXT: popl %esi +; SDAG-X86-NEXT: popl %ebx +; SDAG-X86-NEXT: retl ; -; X64-LABEL: multiple_flt_rounds: -; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rbp -; X64-NEXT: pushq %r14 -; X64-NEXT: pushq %rbx -; X64-NEXT: subq $16, %rsp -; X64-NEXT: movl $1024, %edi # imm = 0x400 -; X64-NEXT: callq fesetround -; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andb $6, %cl -; X64-NEXT: movl $45, %ebx -; X64-NEXT: movl $45, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %eax -; X64-NEXT: andl $3, %eax -; X64-NEXT: xorl %r14d, %r14d -; X64-NEXT: cmpl $3, %eax -; X64-NEXT: setne %r14b -; X64-NEXT: xorl %edi, %edi -; X64-NEXT: callq fesetround -; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andb $6, %cl -; X64-NEXT: movl $45, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %eax -; X64-NEXT: andl $3, %eax -; X64-NEXT: leal 1(%r14), %ebp -; X64-NEXT: cmpl $1, %eax -; X64-NEXT: cmovel %r14d, %ebp -; X64-NEXT: movl $3072, %edi # imm = 0xC00 -; X64-NEXT: callq fesetround -; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andb $6, %cl -; X64-NEXT: movl $45, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %eax -; X64-NEXT: andl $3, %eax -; X64-NEXT: cmpl $1, %eax -; X64-NEXT: sbbl $-1, %ebp -; X64-NEXT: movl $2048, %edi # imm = 0x800 -; X64-NEXT: callq fesetround -; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andb $6, %cl -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %ebx -; X64-NEXT: andl $3, %ebx -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpl $2, %ebx -; X64-NEXT: setne %cl -; X64-NEXT: negl %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %ecx, %ebp -; X64-NEXT: setne %al -; X64-NEXT: addq $16, %rsp -; X64-NEXT: popq %rbx -; X64-NEXT: popq %r14 -; X64-NEXT: popq %rbp -; X64-NEXT: retq +; SDAG-X64-LABEL: multiple_flt_rounds: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: pushq %rbp +; SDAG-X64-NEXT: pushq %r14 +; SDAG-X64-NEXT: pushq %rbx +; SDAG-X64-NEXT: subq $16, %rsp +; SDAG-X64-NEXT: movl $1024, %edi # imm = 0x400 +; SDAG-X64-NEXT: callq fesetround +; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; SDAG-X64-NEXT: shrl $9, %ecx +; SDAG-X64-NEXT: andb $6, %cl +; SDAG-X64-NEXT: movl $45, %ebx +; SDAG-X64-NEXT: movl $45, %eax +; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X64-NEXT: shrl %cl, %eax +; SDAG-X64-NEXT: andl $3, %eax +; SDAG-X64-NEXT: xorl %r14d, %r14d +; SDAG-X64-NEXT: cmpl $3, %eax +; SDAG-X64-NEXT: setne %r14b +; SDAG-X64-NEXT: xorl %edi, %edi +; SDAG-X64-NEXT: callq fesetround +; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; SDAG-X64-NEXT: shrl $9, %ecx +; SDAG-X64-NEXT: andb $6, %cl +; SDAG-X64-NEXT: movl $45, %eax +; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X64-NEXT: shrl %cl, %eax +; SDAG-X64-NEXT: andl $3, %eax +; SDAG-X64-NEXT: leal 1(%r14), %ebp +; SDAG-X64-NEXT: cmpl $1, %eax +; SDAG-X64-NEXT: cmovel %r14d, %ebp +; SDAG-X64-NEXT: movl $3072, %edi # imm = 0xC00 +; SDAG-X64-NEXT: callq fesetround +; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; SDAG-X64-NEXT: shrl $9, %ecx +; SDAG-X64-NEXT: andb $6, %cl +; SDAG-X64-NEXT: movl $45, %eax +; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X64-NEXT: shrl %cl, %eax +; SDAG-X64-NEXT: andl $3, %eax +; SDAG-X64-NEXT: cmpl $1, %eax +; SDAG-X64-NEXT: sbbl $-1, %ebp +; SDAG-X64-NEXT: movl $2048, %edi # imm = 0x800 +; SDAG-X64-NEXT: callq fesetround +; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; SDAG-X64-NEXT: shrl $9, %ecx +; SDAG-X64-NEXT: andb $6, %cl +; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X64-NEXT: shrl %cl, %ebx +; SDAG-X64-NEXT: andl $3, %ebx +; SDAG-X64-NEXT: xorl %ecx, %ecx +; SDAG-X64-NEXT: cmpl $2, %ebx +; SDAG-X64-NEXT: setne %cl +; SDAG-X64-NEXT: negl %ecx +; SDAG-X64-NEXT: xorl %eax, %eax +; SDAG-X64-NEXT: cmpl %ecx, %ebp +; SDAG-X64-NEXT: setne %al +; SDAG-X64-NEXT: addq $16, %rsp +; SDAG-X64-NEXT: popq %rbx +; SDAG-X64-NEXT: popq %r14 +; SDAG-X64-NEXT: popq %rbp +; SDAG-X64-NEXT: retq +; +; GISEL-X86-LABEL: multiple_flt_rounds: +; GISEL-X86: # %bb.0: # %entry +; GISEL-X86-NEXT: pushl %ebp +; GISEL-X86-NEXT: pushl %ebx +; GISEL-X86-NEXT: pushl %edi +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movl $1, %ebp +; GISEL-X86-NEXT: movl $1024, (%esp) # imm = 0x400 +; GISEL-X86-NEXT: calll fesetround +; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: shrl $9, %ecx +; GISEL-X86-NEXT: andb $6, %cl +; GISEL-X86-NEXT: movl $45, %edi +; GISEL-X86-NEXT: movl $45, %eax +; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X86-NEXT: shrl %cl, %eax +; GISEL-X86-NEXT: andl $3, %eax +; GISEL-X86-NEXT: xorl %ebx, %ebx +; GISEL-X86-NEXT: cmpl $3, %eax +; GISEL-X86-NEXT: setne %bl +; GISEL-X86-NEXT: andl $1, %ebx +; GISEL-X86-NEXT: movl $0, (%esp) +; GISEL-X86-NEXT: calll fesetround +; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: shrl $9, %ecx +; GISEL-X86-NEXT: andb $6, %cl +; GISEL-X86-NEXT: movl $45, %edx +; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X86-NEXT: shrl %cl, %edx +; GISEL-X86-NEXT: andl $3, %edx +; GISEL-X86-NEXT: xorl %eax, %eax +; GISEL-X86-NEXT: cmpl $1, %edx +; GISEL-X86-NEXT: sete %cl +; GISEL-X86-NEXT: testl %ebx, %ebx +; GISEL-X86-NEXT: je .LBB1_2 +; GISEL-X86-NEXT: # %bb.1: # %entry +; GISEL-X86-NEXT: movl $2, %ebp +; GISEL-X86-NEXT: .LBB1_2: # %entry +; GISEL-X86-NEXT: xorl %esi, %esi +; GISEL-X86-NEXT: movb %cl, %al +; GISEL-X86-NEXT: andl $1, %eax +; GISEL-X86-NEXT: je .LBB1_4 +; GISEL-X86-NEXT: # %bb.3: # %entry +; GISEL-X86-NEXT: movl %ebx, %ebp +; GISEL-X86-NEXT: .LBB1_4: # %entry +; GISEL-X86-NEXT: movl $3072, (%esp) # imm = 0xC00 +; GISEL-X86-NEXT: calll fesetround +; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: shrl $9, %ecx +; GISEL-X86-NEXT: andb $6, %cl +; GISEL-X86-NEXT: movl $45, %eax +; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X86-NEXT: shrl %cl, %eax +; GISEL-X86-NEXT: andl $3, %eax +; GISEL-X86-NEXT: xorl %ebx, %ebx +; GISEL-X86-NEXT: cmpl %esi, %eax +; GISEL-X86-NEXT: setne %bl +; GISEL-X86-NEXT: andl $1, %ebx +; GISEL-X86-NEXT: addl %ebp, %ebx +; GISEL-X86-NEXT: movl $2048, (%esp) # imm = 0x800 +; GISEL-X86-NEXT: calll fesetround +; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: shrl $9, %ecx +; GISEL-X86-NEXT: andb $6, %cl +; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X86-NEXT: shrl %cl, %edi +; GISEL-X86-NEXT: andl $3, %edi +; GISEL-X86-NEXT: xorl %ecx, %ecx +; GISEL-X86-NEXT: movl $2, %eax +; GISEL-X86-NEXT: cmpl %eax, %edi +; GISEL-X86-NEXT: setne %cl +; GISEL-X86-NEXT: shll $31, %ecx +; GISEL-X86-NEXT: sarl $31, %ecx +; GISEL-X86-NEXT: xorl %eax, %eax +; GISEL-X86-NEXT: cmpl %ecx, %ebx +; GISEL-X86-NEXT: setne %al +; GISEL-X86-NEXT: andl $1, %eax +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: popl %edi +; GISEL-X86-NEXT: popl %ebx +; GISEL-X86-NEXT: popl %ebp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: multiple_flt_rounds: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: pushq %rbp +; GISEL-X64-NEXT: pushq %r15 +; GISEL-X64-NEXT: pushq %r14 +; GISEL-X64-NEXT: pushq %rbx +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: movl $1, %r14d +; GISEL-X64-NEXT: movl $2, %ebp +; GISEL-X64-NEXT: movl $1024, %edi # imm = 0x400 +; GISEL-X64-NEXT: callq fesetround +; GISEL-X64-NEXT: fnstcw (%rsp) +; GISEL-X64-NEXT: movzwl (%rsp), %ecx +; GISEL-X64-NEXT: shrl $9, %ecx +; GISEL-X64-NEXT: andb $6, %cl +; GISEL-X64-NEXT: movl $45, %ebx +; GISEL-X64-NEXT: movl $45, %eax +; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X64-NEXT: shrl %cl, %eax +; GISEL-X64-NEXT: andl $3, %eax +; GISEL-X64-NEXT: xorl %r15d, %r15d +; GISEL-X64-NEXT: cmpl $3, %eax +; GISEL-X64-NEXT: setne %r15b +; GISEL-X64-NEXT: andl $1, %r15d +; GISEL-X64-NEXT: xorl %edi, %edi +; GISEL-X64-NEXT: callq fesetround +; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; GISEL-X64-NEXT: shrl $9, %ecx +; GISEL-X64-NEXT: andb $6, %cl +; GISEL-X64-NEXT: movl $45, %eax +; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X64-NEXT: shrl %cl, %eax +; GISEL-X64-NEXT: andl $3, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpl $1, %eax +; GISEL-X64-NEXT: sete %cl +; GISEL-X64-NEXT: testl %r15d, %r15d +; GISEL-X64-NEXT: cmovel %r14d, %ebp +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovnel %r15d, %ebp +; GISEL-X64-NEXT: movl $3072, %edi # imm = 0xC00 +; GISEL-X64-NEXT: callq fesetround +; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; GISEL-X64-NEXT: shrl $9, %ecx +; GISEL-X64-NEXT: andb $6, %cl +; GISEL-X64-NEXT: movl $45, %eax +; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X64-NEXT: shrl %cl, %eax +; GISEL-X64-NEXT: andl $3, %eax +; GISEL-X64-NEXT: xorl %r14d, %r14d +; GISEL-X64-NEXT: cmpl $0, %eax +; GISEL-X64-NEXT: setne %r14b +; GISEL-X64-NEXT: andl $1, %r14d +; GISEL-X64-NEXT: addl %ebp, %r14d +; GISEL-X64-NEXT: movl $2048, %edi # imm = 0x800 +; GISEL-X64-NEXT: callq fesetround +; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; GISEL-X64-NEXT: shrl $9, %ecx +; GISEL-X64-NEXT: andb $6, %cl +; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X64-NEXT: shrl %cl, %ebx +; GISEL-X64-NEXT: andl $3, %ebx +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpl $2, %ebx +; GISEL-X64-NEXT: setne %cl +; GISEL-X64-NEXT: shll $31, %ecx +; GISEL-X64-NEXT: sarl $31, %ecx +; GISEL-X64-NEXT: xorl %eax, %eax +; GISEL-X64-NEXT: cmpl %ecx, %r14d +; GISEL-X64-NEXT: setne %al +; GISEL-X64-NEXT: andl $1, %eax +; GISEL-X64-NEXT: addq $8, %rsp +; GISEL-X64-NEXT: popq %rbx +; GISEL-X64-NEXT: popq %r14 +; GISEL-X64-NEXT: popq %r15 +; GISEL-X64-NEXT: popq %rbp +; GISEL-X64-NEXT: retq entry: %call = tail call i32 @fesetround(i32 1024) %0 = tail call i32 @llvm.get.rounding() diff --git a/llvm/test/CodeGen/X86/fp128-abi.ll b/llvm/test/CodeGen/X86/fp128-abi.ll deleted file mode 100644 index 526ed7c72f73f..0000000000000 --- a/llvm/test/CodeGen/X86/fp128-abi.ll +++ /dev/null @@ -1,659 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py - -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-X64 -; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-X86 -; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC64 -; RUN: llc < %s -mtriple=i686-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC32 -; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MINGW - -define fp128 @return(ptr %p) { -; CHECK-X64-LABEL: return: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: movaps (%rdi), %xmm0 -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: return: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: pushl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 12 -; CHECK-X86-NEXT: .cfi_offset %esi, -12 -; CHECK-X86-NEXT: .cfi_offset %edi, -8 -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-X86-NEXT: movl (%ecx), %edx -; CHECK-X86-NEXT: movl 4(%ecx), %esi -; CHECK-X86-NEXT: movl 8(%ecx), %edi -; CHECK-X86-NEXT: movl 12(%ecx), %ecx -; CHECK-X86-NEXT: movl %ecx, 12(%eax) -; CHECK-X86-NEXT: movl %edi, 8(%eax) -; CHECK-X86-NEXT: movl %esi, 4(%eax) -; CHECK-X86-NEXT: movl %edx, (%eax) -; CHECK-X86-NEXT: popl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: popl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 -; CHECK-X86-NEXT: retl $4 -; -; CHECK-MSVC64-LABEL: return: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: movaps (%rcx), %xmm0 -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: return: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %edi -; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-MSVC32-NEXT: movl (%ecx), %edx -; CHECK-MSVC32-NEXT: movl 4(%ecx), %esi -; CHECK-MSVC32-NEXT: movl 8(%ecx), %edi -; CHECK-MSVC32-NEXT: movl 12(%ecx), %ecx -; CHECK-MSVC32-NEXT: movl %ecx, 12(%eax) -; CHECK-MSVC32-NEXT: movl %edi, 8(%eax) -; CHECK-MSVC32-NEXT: movl %esi, 4(%eax) -; CHECK-MSVC32-NEXT: movl %edx, (%eax) -; CHECK-MSVC32-NEXT: popl %esi -; CHECK-MSVC32-NEXT: popl %edi -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: return: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: movaps (%rcx), %xmm0 -; CHECK-MINGW-NEXT: retq - %r = load fp128, ptr %p, align 16 - ret fp128 %r -} - -define fp128 @first_arg(fp128 %x) { -; CHECK-X64-LABEL: first_arg: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: first_arg: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: pushl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 12 -; CHECK-X86-NEXT: .cfi_offset %esi, -12 -; CHECK-X86-NEXT: .cfi_offset %edi, -8 -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-X86-NEXT: movl %edi, 12(%eax) -; CHECK-X86-NEXT: movl %esi, 8(%eax) -; CHECK-X86-NEXT: movl %edx, 4(%eax) -; CHECK-X86-NEXT: movl %ecx, (%eax) -; CHECK-X86-NEXT: popl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: popl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 -; CHECK-X86-NEXT: retl $4 -; -; CHECK-MSVC64-LABEL: first_arg: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: movaps (%rcx), %xmm0 -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: first_arg: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %edi -; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) -; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) -; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) -; CHECK-MSVC32-NEXT: movl %ecx, (%eax) -; CHECK-MSVC32-NEXT: popl %esi -; CHECK-MSVC32-NEXT: popl %edi -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: first_arg: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: movaps (%rcx), %xmm0 -; CHECK-MINGW-NEXT: retq - ret fp128 %x -} - -define fp128 @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, fp128 %x) { -; CHECK-X64-LABEL: leading_args: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: leading_args: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: pushl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 12 -; CHECK-X86-NEXT: .cfi_offset %esi, -12 -; CHECK-X86-NEXT: .cfi_offset %edi, -8 -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-X86-NEXT: movl %edi, 12(%eax) -; CHECK-X86-NEXT: movl %esi, 8(%eax) -; CHECK-X86-NEXT: movl %edx, 4(%eax) -; CHECK-X86-NEXT: movl %ecx, (%eax) -; CHECK-X86-NEXT: popl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: popl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 -; CHECK-X86-NEXT: retl $4 -; -; CHECK-MSVC64-LABEL: leading_args: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-MSVC64-NEXT: movaps (%rax), %xmm0 -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: leading_args: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %edi -; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) -; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) -; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) -; CHECK-MSVC32-NEXT: movl %ecx, (%eax) -; CHECK-MSVC32-NEXT: popl %esi -; CHECK-MSVC32-NEXT: popl %edi -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: leading_args: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-MINGW-NEXT: movaps (%rax), %xmm0 -; CHECK-MINGW-NEXT: retq - ret fp128 %x -} - -define fp128 @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, fp128 %_5, fp128 %x) { -; CHECK-X64-LABEL: many_leading_args: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: movaps %xmm1, %xmm0 -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: many_leading_args: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: pushl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 12 -; CHECK-X86-NEXT: .cfi_offset %esi, -12 -; CHECK-X86-NEXT: .cfi_offset %edi, -8 -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-X86-NEXT: movl %edi, 12(%eax) -; CHECK-X86-NEXT: movl %esi, 8(%eax) -; CHECK-X86-NEXT: movl %edx, 4(%eax) -; CHECK-X86-NEXT: movl %ecx, (%eax) -; CHECK-X86-NEXT: popl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: popl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 -; CHECK-X86-NEXT: retl $4 -; -; CHECK-MSVC64-LABEL: many_leading_args: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-MSVC64-NEXT: movaps (%rax), %xmm0 -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: many_leading_args: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %edi -; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) -; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) -; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) -; CHECK-MSVC32-NEXT: movl %ecx, (%eax) -; CHECK-MSVC32-NEXT: popl %esi -; CHECK-MSVC32-NEXT: popl %edi -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: many_leading_args: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-MINGW-NEXT: movaps (%rax), %xmm0 -; CHECK-MINGW-NEXT: retq - ret fp128 %x -} - -define fp128 @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, fp128 %x, i64 %_5) { -; CHECK-X64-LABEL: trailing_arg: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: trailing_arg: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: pushl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: pushl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 12 -; CHECK-X86-NEXT: .cfi_offset %esi, -12 -; CHECK-X86-NEXT: .cfi_offset %edi, -8 -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-X86-NEXT: movl %edi, 12(%eax) -; CHECK-X86-NEXT: movl %esi, 8(%eax) -; CHECK-X86-NEXT: movl %edx, 4(%eax) -; CHECK-X86-NEXT: movl %ecx, (%eax) -; CHECK-X86-NEXT: popl %esi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-NEXT: popl %edi -; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 -; CHECK-X86-NEXT: retl $4 -; -; CHECK-MSVC64-LABEL: trailing_arg: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-MSVC64-NEXT: movaps (%rax), %xmm0 -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: trailing_arg: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %edi -; CHECK-MSVC32-NEXT: pushl %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) -; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) -; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) -; CHECK-MSVC32-NEXT: movl %ecx, (%eax) -; CHECK-MSVC32-NEXT: popl %esi -; CHECK-MSVC32-NEXT: popl %edi -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: trailing_arg: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-MINGW-NEXT: movaps (%rax), %xmm0 -; CHECK-MINGW-NEXT: retq - ret fp128 %x -} - -define void @call_first_arg(fp128 %x) nounwind { -; CHECK-X64-LABEL: call_first_arg: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: pushq %rax -; CHECK-X64-NEXT: callq first_arg@PLT -; CHECK-X64-NEXT: popq %rax -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: call_first_arg: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: subl $40, %esp -; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl %eax -; CHECK-X86-NEXT: calll first_arg@PLT -; CHECK-X86-NEXT: addl $56, %esp -; CHECK-X86-NEXT: retl -; -; CHECK-MSVC64-LABEL: call_first_arg: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: subq $56, %rsp -; CHECK-MSVC64-NEXT: movaps (%rcx), %xmm0 -; CHECK-MSVC64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx -; CHECK-MSVC64-NEXT: callq first_arg -; CHECK-MSVC64-NEXT: addq $56, %rsp -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: call_first_arg: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %ebp -; CHECK-MSVC32-NEXT: movl %esp, %ebp -; CHECK-MSVC32-NEXT: andl $-16, %esp -; CHECK-MSVC32-NEXT: subl $32, %esp -; CHECK-MSVC32-NEXT: movl %esp, %eax -; CHECK-MSVC32-NEXT: pushl 20(%ebp) -; CHECK-MSVC32-NEXT: pushl 16(%ebp) -; CHECK-MSVC32-NEXT: pushl 12(%ebp) -; CHECK-MSVC32-NEXT: pushl 8(%ebp) -; CHECK-MSVC32-NEXT: pushl %eax -; CHECK-MSVC32-NEXT: calll _first_arg -; CHECK-MSVC32-NEXT: addl $20, %esp -; CHECK-MSVC32-NEXT: movl %ebp, %esp -; CHECK-MSVC32-NEXT: popl %ebp -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: call_first_arg: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: subq $56, %rsp -; CHECK-MINGW-NEXT: movaps (%rcx), %xmm0 -; CHECK-MINGW-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: leaq {{[0-9]+}}(%rsp), %rcx -; CHECK-MINGW-NEXT: callq first_arg -; CHECK-MINGW-NEXT: addq $56, %rsp -; CHECK-MINGW-NEXT: retq - call i128 @first_arg(fp128 %x) - ret void -} - -define void @call_leading_args(fp128 %x) nounwind { -; CHECK-X64-LABEL: call_leading_args: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: pushq %rax -; CHECK-X64-NEXT: xorl %edi, %edi -; CHECK-X64-NEXT: xorl %esi, %esi -; CHECK-X64-NEXT: xorl %edx, %edx -; CHECK-X64-NEXT: xorl %ecx, %ecx -; CHECK-X64-NEXT: callq leading_args@PLT -; CHECK-X64-NEXT: popq %rax -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: call_leading_args: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: subl $40, %esp -; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl %eax -; CHECK-X86-NEXT: calll leading_args@PLT -; CHECK-X86-NEXT: addl $88, %esp -; CHECK-X86-NEXT: retl -; -; CHECK-MSVC64-LABEL: call_leading_args: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: subq $72, %rsp -; CHECK-MSVC64-NEXT: movaps (%rcx), %xmm0 -; CHECK-MSVC64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-MSVC64-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: xorl %ecx, %ecx -; CHECK-MSVC64-NEXT: xorl %edx, %edx -; CHECK-MSVC64-NEXT: xorl %r8d, %r8d -; CHECK-MSVC64-NEXT: xorl %r9d, %r9d -; CHECK-MSVC64-NEXT: callq leading_args -; CHECK-MSVC64-NEXT: addq $72, %rsp -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: call_leading_args: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %ebp -; CHECK-MSVC32-NEXT: movl %esp, %ebp -; CHECK-MSVC32-NEXT: andl $-16, %esp -; CHECK-MSVC32-NEXT: subl $32, %esp -; CHECK-MSVC32-NEXT: movl %esp, %eax -; CHECK-MSVC32-NEXT: pushl 20(%ebp) -; CHECK-MSVC32-NEXT: pushl 16(%ebp) -; CHECK-MSVC32-NEXT: pushl 12(%ebp) -; CHECK-MSVC32-NEXT: pushl 8(%ebp) -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl %eax -; CHECK-MSVC32-NEXT: calll _leading_args -; CHECK-MSVC32-NEXT: addl $52, %esp -; CHECK-MSVC32-NEXT: movl %ebp, %esp -; CHECK-MSVC32-NEXT: popl %ebp -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: call_leading_args: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: subq $72, %rsp -; CHECK-MINGW-NEXT: movaps (%rcx), %xmm0 -; CHECK-MINGW-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-MINGW-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: xorl %ecx, %ecx -; CHECK-MINGW-NEXT: xorl %edx, %edx -; CHECK-MINGW-NEXT: xorl %r8d, %r8d -; CHECK-MINGW-NEXT: xorl %r9d, %r9d -; CHECK-MINGW-NEXT: callq leading_args -; CHECK-MINGW-NEXT: addq $72, %rsp -; CHECK-MINGW-NEXT: retq - call i128 @leading_args(i64 0, i64 0, i64 0, i64 0, fp128 %x) - ret void -} - -define void @call_many_leading_args(fp128 %x) nounwind { -; CHECK-X64-LABEL: call_many_leading_args: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: pushq %rax -; CHECK-X64-NEXT: movaps %xmm0, %xmm1 -; CHECK-X64-NEXT: xorps %xmm0, %xmm0 -; CHECK-X64-NEXT: xorl %edi, %edi -; CHECK-X64-NEXT: xorl %esi, %esi -; CHECK-X64-NEXT: xorl %edx, %edx -; CHECK-X64-NEXT: xorl %ecx, %ecx -; CHECK-X64-NEXT: callq many_leading_args@PLT -; CHECK-X64-NEXT: popq %rax -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: call_many_leading_args: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: subl $40, %esp -; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl %eax -; CHECK-X86-NEXT: calll many_leading_args@PLT -; CHECK-X86-NEXT: addl $104, %esp -; CHECK-X86-NEXT: retl -; -; CHECK-MSVC64-LABEL: call_many_leading_args: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: subq $88, %rsp -; CHECK-MSVC64-NEXT: movaps (%rcx), %xmm0 -; CHECK-MSVC64-NEXT: xorps %xmm1, %xmm1 -; CHECK-MSVC64-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-MSVC64-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-MSVC64-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: xorl %ecx, %ecx -; CHECK-MSVC64-NEXT: xorl %edx, %edx -; CHECK-MSVC64-NEXT: xorl %r8d, %r8d -; CHECK-MSVC64-NEXT: xorl %r9d, %r9d -; CHECK-MSVC64-NEXT: callq many_leading_args -; CHECK-MSVC64-NEXT: addq $88, %rsp -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: call_many_leading_args: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %ebp -; CHECK-MSVC32-NEXT: movl %esp, %ebp -; CHECK-MSVC32-NEXT: andl $-16, %esp -; CHECK-MSVC32-NEXT: subl $32, %esp -; CHECK-MSVC32-NEXT: movl %esp, %eax -; CHECK-MSVC32-NEXT: pushl 20(%ebp) -; CHECK-MSVC32-NEXT: pushl 16(%ebp) -; CHECK-MSVC32-NEXT: pushl 12(%ebp) -; CHECK-MSVC32-NEXT: pushl 8(%ebp) -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl %eax -; CHECK-MSVC32-NEXT: calll _many_leading_args -; CHECK-MSVC32-NEXT: addl $68, %esp -; CHECK-MSVC32-NEXT: movl %ebp, %esp -; CHECK-MSVC32-NEXT: popl %ebp -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: call_many_leading_args: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: subq $88, %rsp -; CHECK-MINGW-NEXT: movaps (%rcx), %xmm0 -; CHECK-MINGW-NEXT: xorps %xmm1, %xmm1 -; CHECK-MINGW-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-MINGW-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-MINGW-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: xorl %ecx, %ecx -; CHECK-MINGW-NEXT: xorl %edx, %edx -; CHECK-MINGW-NEXT: xorl %r8d, %r8d -; CHECK-MINGW-NEXT: xorl %r9d, %r9d -; CHECK-MINGW-NEXT: callq many_leading_args -; CHECK-MINGW-NEXT: addq $88, %rsp -; CHECK-MINGW-NEXT: retq - call i128 @many_leading_args(i64 0, i64 0, i64 0, i64 0, fp128 0xL0, fp128 %x) - ret void -} - -define void @call_trailing_arg(fp128 %x) nounwind { -; CHECK-X64-LABEL: call_trailing_arg: -; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: pushq %rax -; CHECK-X64-NEXT: xorl %edi, %edi -; CHECK-X64-NEXT: xorl %esi, %esi -; CHECK-X64-NEXT: xorl %edx, %edx -; CHECK-X64-NEXT: xorl %ecx, %ecx -; CHECK-X64-NEXT: callq trailing_arg@PLT -; CHECK-X64-NEXT: popq %rax -; CHECK-X64-NEXT: retq -; -; CHECK-X86-LABEL: call_trailing_arg: -; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: subl $40, %esp -; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl $0 -; CHECK-X86-NEXT: pushl %eax -; CHECK-X86-NEXT: calll trailing_arg@PLT -; CHECK-X86-NEXT: addl $88, %esp -; CHECK-X86-NEXT: retl -; -; CHECK-MSVC64-LABEL: call_trailing_arg: -; CHECK-MSVC64: # %bb.0: -; CHECK-MSVC64-NEXT: subq $72, %rsp -; CHECK-MSVC64-NEXT: movaps (%rcx), %xmm0 -; CHECK-MSVC64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-MSVC64-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-MSVC64-NEXT: xorl %ecx, %ecx -; CHECK-MSVC64-NEXT: xorl %edx, %edx -; CHECK-MSVC64-NEXT: xorl %r8d, %r8d -; CHECK-MSVC64-NEXT: xorl %r9d, %r9d -; CHECK-MSVC64-NEXT: callq trailing_arg -; CHECK-MSVC64-NEXT: addq $72, %rsp -; CHECK-MSVC64-NEXT: retq -; -; CHECK-MSVC32-LABEL: call_trailing_arg: -; CHECK-MSVC32: # %bb.0: -; CHECK-MSVC32-NEXT: pushl %ebp -; CHECK-MSVC32-NEXT: movl %esp, %ebp -; CHECK-MSVC32-NEXT: andl $-16, %esp -; CHECK-MSVC32-NEXT: subl $32, %esp -; CHECK-MSVC32-NEXT: movl %esp, %eax -; CHECK-MSVC32-NEXT: pushl 20(%ebp) -; CHECK-MSVC32-NEXT: pushl 16(%ebp) -; CHECK-MSVC32-NEXT: pushl 12(%ebp) -; CHECK-MSVC32-NEXT: pushl 8(%ebp) -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl $0 -; CHECK-MSVC32-NEXT: pushl %eax -; CHECK-MSVC32-NEXT: calll _trailing_arg -; CHECK-MSVC32-NEXT: addl $52, %esp -; CHECK-MSVC32-NEXT: movl %ebp, %esp -; CHECK-MSVC32-NEXT: popl %ebp -; CHECK-MSVC32-NEXT: retl -; -; CHECK-MINGW-LABEL: call_trailing_arg: -; CHECK-MINGW: # %bb.0: -; CHECK-MINGW-NEXT: subq $72, %rsp -; CHECK-MINGW-NEXT: movaps (%rcx), %xmm0 -; CHECK-MINGW-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-MINGW-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-MINGW-NEXT: xorl %ecx, %ecx -; CHECK-MINGW-NEXT: xorl %edx, %edx -; CHECK-MINGW-NEXT: xorl %r8d, %r8d -; CHECK-MINGW-NEXT: xorl %r9d, %r9d -; CHECK-MINGW-NEXT: callq trailing_arg -; CHECK-MINGW-NEXT: addq $72, %rsp -; CHECK-MINGW-NEXT: retq - call i128 @trailing_arg(i64 0, i64 0, i64 0, i64 0, fp128 %x) - ret void -} diff --git a/llvm/test/CodeGen/X86/i128-abi.ll b/llvm/test/CodeGen/X86/i128-abi.ll deleted file mode 100644 index 264c546b4cae2..0000000000000 --- a/llvm/test/CodeGen/X86/i128-abi.ll +++ /dev/null @@ -1,97 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s - -define i128 @in_reg(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i128 %a4) { -; CHECK-LABEL: in_reg: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %r9, %rdx -; CHECK-NEXT: movq %r8, %rax -; CHECK-NEXT: retq - ret i128 %a4 -} - -define i128 @on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5) { -; CHECK-LABEL: on_stack: -; CHECK: # %bb.0: -; CHECK-NEXT: movq 8(%rsp), %rax -; CHECK-NEXT: movq 16(%rsp), %rdx -; CHECK-NEXT: retq - ret i128 %a5 -} - -define i128 @on_stack2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i128 %a6) { -; CHECK-LABEL: on_stack2: -; CHECK: # %bb.0: -; CHECK-NEXT: movq 24(%rsp), %rax -; CHECK-NEXT: movq 32(%rsp), %rdx -; CHECK-NEXT: retq - ret i128 %a6 -} - -define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) { -; CHECK-LABEL: trailing_arg_on_stack: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %r9, %rax -; CHECK-NEXT: retq - ret i64 %a6 -} - -define void @call_in_reg(i128 %x) nounwind { -; CHECK-LABEL: call_in_reg: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movq %rsi, %r9 -; CHECK-NEXT: movq %rdi, %r8 -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $2, %edx -; CHECK-NEXT: movl $3, %ecx -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: callq in_reg@PLT -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq - call i128 @in_reg(i64 0, i64 1, i64 2, i64 3, i128 %x) - ret void -} - -define void @call_on_stack(i128 %x) nounwind { -; CHECK-LABEL: call_on_stack: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: movq %rdi, %r9 -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $2, %edx -; CHECK-NEXT: movl $3, %ecx -; CHECK-NEXT: movl $4, %r8d -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: pushq %r9 -; CHECK-NEXT: callq on_stack@PLT -; CHECK-NEXT: addq $16, %rsp -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq - call i128 @on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x) - ret void -} - -define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind { -; CHECK-LABEL: call_trailing_arg_on_stack: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movq %rdx, %r9 -; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: movq %rdi, %r10 -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movl $2, %edx -; CHECK-NEXT: movl $3, %ecx -; CHECK-NEXT: movl $4, %r8d -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: pushq %r10 -; CHECK-NEXT: callq trailing_arg_on_stack@PLT -; CHECK-NEXT: addq $16, %rsp -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq - call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y) - ret void -} diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll b/llvm/test/CodeGen/X86/i128-fp128-abi.ll new file mode 100644 index 0000000000000..be8f7923b8f98 --- /dev/null +++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll @@ -0,0 +1,873 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; Combined ABI tests for fp128 and i128 + +; RUN: sed 's/PrimTy/fp128/g' %s | sed 's/Prim0/0xL0/g' | llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-X64-F128 +; RUN: sed 's/PrimTy/i128/g' %s | sed 's/Prim0/0/g' | llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-X64-I128 +; RUN: sed 's/PrimTy/fp128/g' %s | sed 's/Prim0/0xL0/g' | llc -mtriple=x86_64-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC64-F128 +; RUN: sed 's/PrimTy/i128/g' %s | sed 's/Prim0/0/g' | llc -mtriple=x86_64-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC64-I128 +; RUN: sed 's/PrimTy/fp128/g' %s | sed 's/Prim0/0xL0/g' | llc -mtriple=x86_64-pc-windows-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MINGW-F128 +; RUN: sed 's/PrimTy/i128/g' %s | sed 's/Prim0/0/g' | llc -mtriple=x86_64-pc-windows-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MINGW-I128 +; +; Use the same directive for i128 and fp128 on x86-32 since both are passed and returned on the stack. +; RUN: sed 's/PrimTy/fp128/g' %s | sed 's/Prim0/0xL0/g' | llc -mtriple=i686-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-X86 +; RUN: sed 's/PrimTy/i128/g' %s | sed 's/Prim0/0/g' | llc -mtriple=i686-unknown-linux-gnu -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-X86 +; RUN: sed 's/PrimTy/fp128/g' %s | sed 's/Prim0/0xL0/g' | llc -mtriple=i686-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC32 +; RUN: sed 's/PrimTy/i128/g' %s | sed 's/Prim0/0/g' | llc -mtriple=i686-pc-windows-msvc -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-MSVC32 + +define PrimTy @return(ptr %p) nounwind { +; CHECK-X64-F128-LABEL: return: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: movaps (%rdi), %xmm0 +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: return: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: movq (%rdi), %rax +; CHECK-X64-I128-NEXT: movq 8(%rdi), %rdx +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: return: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: return: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: movq (%rcx), %rax +; CHECK-MSVC64-I128-NEXT: movq 8(%rcx), %rdx +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: return: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: return: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: movq (%rcx), %rax +; CHECK-MINGW-I128-NEXT: movq 8(%rcx), %rdx +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: return: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: pushl %edi +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-X86-NEXT: movl (%ecx), %edx +; CHECK-X86-NEXT: movl 4(%ecx), %esi +; CHECK-X86-NEXT: movl 8(%ecx), %edi +; CHECK-X86-NEXT: movl 12(%ecx), %ecx +; CHECK-X86-NEXT: movl %ecx, 12(%eax) +; CHECK-X86-NEXT: movl %edi, 8(%eax) +; CHECK-X86-NEXT: movl %esi, 4(%eax) +; CHECK-X86-NEXT: movl %edx, (%eax) +; CHECK-X86-NEXT: popl %esi +; CHECK-X86-NEXT: popl %edi +; CHECK-X86-NEXT: retl $4 +; +; CHECK-MSVC32-LABEL: return: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %edi +; CHECK-MSVC32-NEXT: pushl %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-MSVC32-NEXT: movl (%ecx), %edx +; CHECK-MSVC32-NEXT: movl 4(%ecx), %esi +; CHECK-MSVC32-NEXT: movl 8(%ecx), %edi +; CHECK-MSVC32-NEXT: movl 12(%ecx), %ecx +; CHECK-MSVC32-NEXT: movl %ecx, 12(%eax) +; CHECK-MSVC32-NEXT: movl %edi, 8(%eax) +; CHECK-MSVC32-NEXT: movl %esi, 4(%eax) +; CHECK-MSVC32-NEXT: movl %edx, (%eax) +; CHECK-MSVC32-NEXT: popl %esi +; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: retl + %r = load PrimTy, ptr %p, align 16 + ret PrimTy %r +} + +define PrimTy @first_arg(PrimTy %x) nounwind { +; CHECK-X64-F128-LABEL: first_arg: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: first_arg: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: movq %rsi, %rdx +; CHECK-X64-I128-NEXT: movq %rdi, %rax +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: first_arg: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: first_arg: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: movq %rcx, %rax +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: first_arg: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: first_arg: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: movq %rcx, %rax +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: first_arg: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: pushl %edi +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-X86-NEXT: movl %edi, 12(%eax) +; CHECK-X86-NEXT: movl %esi, 8(%eax) +; CHECK-X86-NEXT: movl %edx, 4(%eax) +; CHECK-X86-NEXT: movl %ecx, (%eax) +; CHECK-X86-NEXT: popl %esi +; CHECK-X86-NEXT: popl %edi +; CHECK-X86-NEXT: retl $4 +; +; CHECK-MSVC32-LABEL: first_arg: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %edi +; CHECK-MSVC32-NEXT: pushl %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) +; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) +; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) +; CHECK-MSVC32-NEXT: movl %ecx, (%eax) +; CHECK-MSVC32-NEXT: popl %esi +; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: retl + ret PrimTy %x +} + +define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounwind { +; CHECK-X64-F128-LABEL: leading_args: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: leading_args: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: movq %r9, %rdx +; CHECK-X64-I128-NEXT: movq %r8, %rax +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: leading_args: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: leading_args: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: leading_args: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: leading_args: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: leading_args: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: pushl %edi +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-X86-NEXT: movl %edi, 12(%eax) +; CHECK-X86-NEXT: movl %esi, 8(%eax) +; CHECK-X86-NEXT: movl %edx, 4(%eax) +; CHECK-X86-NEXT: movl %ecx, (%eax) +; CHECK-X86-NEXT: popl %esi +; CHECK-X86-NEXT: popl %edi +; CHECK-X86-NEXT: retl $4 +; +; CHECK-MSVC32-LABEL: leading_args: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %edi +; CHECK-MSVC32-NEXT: pushl %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) +; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) +; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) +; CHECK-MSVC32-NEXT: movl %ecx, (%eax) +; CHECK-MSVC32-NEXT: popl %esi +; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: retl + ret PrimTy %x +} + +define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy %_5, PrimTy %x) nounwind { +; CHECK-X64-F128-LABEL: many_leading_args: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: movaps %xmm1, %xmm0 +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: many_leading_args: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: many_leading_args: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: many_leading_args: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: many_leading_args: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: many_leading_args: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: many_leading_args: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: pushl %edi +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-X86-NEXT: movl %edi, 12(%eax) +; CHECK-X86-NEXT: movl %esi, 8(%eax) +; CHECK-X86-NEXT: movl %edx, 4(%eax) +; CHECK-X86-NEXT: movl %ecx, (%eax) +; CHECK-X86-NEXT: popl %esi +; CHECK-X86-NEXT: popl %edi +; CHECK-X86-NEXT: retl $4 +; +; CHECK-MSVC32-LABEL: many_leading_args: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %edi +; CHECK-MSVC32-NEXT: pushl %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) +; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) +; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) +; CHECK-MSVC32-NEXT: movl %ecx, (%eax) +; CHECK-MSVC32-NEXT: popl %esi +; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: retl + ret PrimTy %x +} + +define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy %x, i64 %_5) nounwind { +; CHECK-X64-F128-LABEL: trailing_arg: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: trailing_arg: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-X64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: trailing_arg: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: trailing_arg: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: trailing_arg: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: trailing_arg: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-I128-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: trailing_arg: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: pushl %edi +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-X86-NEXT: movl %edi, 12(%eax) +; CHECK-X86-NEXT: movl %esi, 8(%eax) +; CHECK-X86-NEXT: movl %edx, 4(%eax) +; CHECK-X86-NEXT: movl %ecx, (%eax) +; CHECK-X86-NEXT: popl %esi +; CHECK-X86-NEXT: popl %edi +; CHECK-X86-NEXT: retl $4 +; +; CHECK-MSVC32-LABEL: trailing_arg: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %edi +; CHECK-MSVC32-NEXT: pushl %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-MSVC32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-MSVC32-NEXT: movl %edi, 12(%eax) +; CHECK-MSVC32-NEXT: movl %esi, 8(%eax) +; CHECK-MSVC32-NEXT: movl %edx, 4(%eax) +; CHECK-MSVC32-NEXT: movl %ecx, (%eax) +; CHECK-MSVC32-NEXT: popl %esi +; CHECK-MSVC32-NEXT: popl %edi +; CHECK-MSVC32-NEXT: retl + ret PrimTy %x +} + +define void @call_first_arg(PrimTy %x) nounwind { +; CHECK-X64-F128-LABEL: call_first_arg: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: pushq %rax +; CHECK-X64-F128-NEXT: callq first_arg@PLT +; CHECK-X64-F128-NEXT: popq %rax +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: call_first_arg: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: pushq %rax +; CHECK-X64-I128-NEXT: callq first_arg@PLT +; CHECK-X64-I128-NEXT: popq %rax +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: call_first_arg: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: subq $56, %rsp +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; CHECK-MSVC64-F128-NEXT: callq first_arg +; CHECK-MSVC64-F128-NEXT: addq $56, %rsp +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: call_first_arg: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: subq $40, %rsp +; CHECK-MSVC64-I128-NEXT: callq first_arg +; CHECK-MSVC64-I128-NEXT: addq $40, %rsp +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: call_first_arg: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: subq $56, %rsp +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; CHECK-MINGW-F128-NEXT: callq first_arg +; CHECK-MINGW-F128-NEXT: addq $56, %rsp +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: call_first_arg: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: subq $40, %rsp +; CHECK-MINGW-I128-NEXT: callq first_arg +; CHECK-MINGW-I128-NEXT: addq $40, %rsp +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: call_first_arg: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: subl $40, %esp +; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: calll first_arg@PLT +; CHECK-X86-NEXT: addl $56, %esp +; CHECK-X86-NEXT: retl +; +; CHECK-MSVC32-LABEL: call_first_arg: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: subl $32, %esp +; CHECK-MSVC32-NEXT: movl %esp, %eax +; CHECK-MSVC32-NEXT: pushl 20(%ebp) +; CHECK-MSVC32-NEXT: pushl 16(%ebp) +; CHECK-MSVC32-NEXT: pushl 12(%ebp) +; CHECK-MSVC32-NEXT: pushl 8(%ebp) +; CHECK-MSVC32-NEXT: pushl %eax +; CHECK-MSVC32-NEXT: calll _first_arg +; CHECK-MSVC32-NEXT: addl $20, %esp +; CHECK-MSVC32-NEXT: movl %ebp, %esp +; CHECK-MSVC32-NEXT: popl %ebp +; CHECK-MSVC32-NEXT: retl + call PrimTy @first_arg(PrimTy %x) + ret void +} + +define void @call_leading_args(PrimTy %x) nounwind { +; CHECK-X64-F128-LABEL: call_leading_args: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: pushq %rax +; CHECK-X64-F128-NEXT: xorl %edi, %edi +; CHECK-X64-F128-NEXT: xorl %esi, %esi +; CHECK-X64-F128-NEXT: xorl %edx, %edx +; CHECK-X64-F128-NEXT: xorl %ecx, %ecx +; CHECK-X64-F128-NEXT: callq leading_args@PLT +; CHECK-X64-F128-NEXT: popq %rax +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: call_leading_args: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: pushq %rax +; CHECK-X64-I128-NEXT: movq %rsi, %r9 +; CHECK-X64-I128-NEXT: movq %rdi, %r8 +; CHECK-X64-I128-NEXT: xorl %edi, %edi +; CHECK-X64-I128-NEXT: xorl %esi, %esi +; CHECK-X64-I128-NEXT: xorl %edx, %edx +; CHECK-X64-I128-NEXT: xorl %ecx, %ecx +; CHECK-X64-I128-NEXT: callq leading_args@PLT +; CHECK-X64-I128-NEXT: popq %rax +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: call_leading_args: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: subq $72, %rsp +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx +; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d +; CHECK-MSVC64-F128-NEXT: xorl %r9d, %r9d +; CHECK-MSVC64-F128-NEXT: callq leading_args +; CHECK-MSVC64-F128-NEXT: addq $72, %rsp +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: call_leading_args: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: subq $56, %rsp +; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx +; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d +; CHECK-MSVC64-I128-NEXT: xorl %r9d, %r9d +; CHECK-MSVC64-I128-NEXT: callq leading_args +; CHECK-MSVC64-I128-NEXT: addq $56, %rsp +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: call_leading_args: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: subq $72, %rsp +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-F128-NEXT: xorl %edx, %edx +; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d +; CHECK-MINGW-F128-NEXT: xorl %r9d, %r9d +; CHECK-MINGW-F128-NEXT: callq leading_args +; CHECK-MINGW-F128-NEXT: addq $72, %rsp +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: call_leading_args: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: subq $56, %rsp +; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-I128-NEXT: xorl %edx, %edx +; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d +; CHECK-MINGW-I128-NEXT: xorl %r9d, %r9d +; CHECK-MINGW-I128-NEXT: callq leading_args +; CHECK-MINGW-I128-NEXT: addq $56, %rsp +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: call_leading_args: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: subl $40, %esp +; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: calll leading_args@PLT +; CHECK-X86-NEXT: addl $88, %esp +; CHECK-X86-NEXT: retl +; +; CHECK-MSVC32-LABEL: call_leading_args: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: subl $32, %esp +; CHECK-MSVC32-NEXT: movl %esp, %eax +; CHECK-MSVC32-NEXT: pushl 20(%ebp) +; CHECK-MSVC32-NEXT: pushl 16(%ebp) +; CHECK-MSVC32-NEXT: pushl 12(%ebp) +; CHECK-MSVC32-NEXT: pushl 8(%ebp) +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl %eax +; CHECK-MSVC32-NEXT: calll _leading_args +; CHECK-MSVC32-NEXT: addl $52, %esp +; CHECK-MSVC32-NEXT: movl %ebp, %esp +; CHECK-MSVC32-NEXT: popl %ebp +; CHECK-MSVC32-NEXT: retl + call PrimTy @leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy %x) + ret void +} + +define void @call_many_leading_args(PrimTy %x) nounwind { +; CHECK-X64-F128-LABEL: call_many_leading_args: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: pushq %rax +; CHECK-X64-F128-NEXT: movaps %xmm0, %xmm1 +; CHECK-X64-F128-NEXT: xorps %xmm0, %xmm0 +; CHECK-X64-F128-NEXT: xorl %edi, %edi +; CHECK-X64-F128-NEXT: xorl %esi, %esi +; CHECK-X64-F128-NEXT: xorl %edx, %edx +; CHECK-X64-F128-NEXT: xorl %ecx, %ecx +; CHECK-X64-F128-NEXT: callq many_leading_args@PLT +; CHECK-X64-F128-NEXT: popq %rax +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: call_many_leading_args: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: pushq %rax +; CHECK-X64-I128-NEXT: movq %rsi, %rax +; CHECK-X64-I128-NEXT: movq %rdi, %r10 +; CHECK-X64-I128-NEXT: xorl %edi, %edi +; CHECK-X64-I128-NEXT: xorl %esi, %esi +; CHECK-X64-I128-NEXT: xorl %edx, %edx +; CHECK-X64-I128-NEXT: xorl %ecx, %ecx +; CHECK-X64-I128-NEXT: xorl %r8d, %r8d +; CHECK-X64-I128-NEXT: xorl %r9d, %r9d +; CHECK-X64-I128-NEXT: pushq %rax +; CHECK-X64-I128-NEXT: pushq %r10 +; CHECK-X64-I128-NEXT: callq many_leading_args@PLT +; CHECK-X64-I128-NEXT: addq $16, %rsp +; CHECK-X64-I128-NEXT: popq %rax +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: call_many_leading_args: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: subq $88, %rsp +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: xorps %xmm1, %xmm1 +; CHECK-MSVC64-F128-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx +; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d +; CHECK-MSVC64-F128-NEXT: xorl %r9d, %r9d +; CHECK-MSVC64-F128-NEXT: callq many_leading_args +; CHECK-MSVC64-F128-NEXT: addq $88, %rsp +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: call_many_leading_args: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: subq $72, %rsp +; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-I128-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-I128-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx +; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d +; CHECK-MSVC64-I128-NEXT: xorl %r9d, %r9d +; CHECK-MSVC64-I128-NEXT: callq many_leading_args +; CHECK-MSVC64-I128-NEXT: addq $72, %rsp +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: call_many_leading_args: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: subq $88, %rsp +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: xorps %xmm1, %xmm1 +; CHECK-MINGW-F128-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-F128-NEXT: xorl %edx, %edx +; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d +; CHECK-MINGW-F128-NEXT: xorl %r9d, %r9d +; CHECK-MINGW-F128-NEXT: callq many_leading_args +; CHECK-MINGW-F128-NEXT: addq $88, %rsp +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: call_many_leading_args: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: subq $72, %rsp +; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-MINGW-I128-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-MINGW-I128-NEXT: movq $0, {{[0-9]+}}(%rsp) +; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-I128-NEXT: xorl %edx, %edx +; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d +; CHECK-MINGW-I128-NEXT: xorl %r9d, %r9d +; CHECK-MINGW-I128-NEXT: callq many_leading_args +; CHECK-MINGW-I128-NEXT: addq $72, %rsp +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: call_many_leading_args: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: subl $40, %esp +; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: calll many_leading_args@PLT +; CHECK-X86-NEXT: addl $104, %esp +; CHECK-X86-NEXT: retl +; +; CHECK-MSVC32-LABEL: call_many_leading_args: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: subl $32, %esp +; CHECK-MSVC32-NEXT: movl %esp, %eax +; CHECK-MSVC32-NEXT: pushl 20(%ebp) +; CHECK-MSVC32-NEXT: pushl 16(%ebp) +; CHECK-MSVC32-NEXT: pushl 12(%ebp) +; CHECK-MSVC32-NEXT: pushl 8(%ebp) +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl %eax +; CHECK-MSVC32-NEXT: calll _many_leading_args +; CHECK-MSVC32-NEXT: addl $68, %esp +; CHECK-MSVC32-NEXT: movl %ebp, %esp +; CHECK-MSVC32-NEXT: popl %ebp +; CHECK-MSVC32-NEXT: retl + call PrimTy @many_leading_args(i64 0, i64 0, i64 0, i64 0, PrimTy Prim0, PrimTy %x) + ret void +} + +define void @call_trailing_arg(PrimTy %x) nounwind { +; CHECK-X64-F128-LABEL: call_trailing_arg: +; CHECK-X64-F128: # %bb.0: +; CHECK-X64-F128-NEXT: pushq %rax +; CHECK-X64-F128-NEXT: xorl %edi, %edi +; CHECK-X64-F128-NEXT: xorl %esi, %esi +; CHECK-X64-F128-NEXT: xorl %edx, %edx +; CHECK-X64-F128-NEXT: xorl %ecx, %ecx +; CHECK-X64-F128-NEXT: callq trailing_arg@PLT +; CHECK-X64-F128-NEXT: popq %rax +; CHECK-X64-F128-NEXT: retq +; +; CHECK-X64-I128-LABEL: call_trailing_arg: +; CHECK-X64-I128: # %bb.0: +; CHECK-X64-I128-NEXT: pushq %rax +; CHECK-X64-I128-NEXT: movq %rsi, %r9 +; CHECK-X64-I128-NEXT: movq %rdi, %r8 +; CHECK-X64-I128-NEXT: xorl %edi, %edi +; CHECK-X64-I128-NEXT: xorl %esi, %esi +; CHECK-X64-I128-NEXT: xorl %edx, %edx +; CHECK-X64-I128-NEXT: xorl %ecx, %ecx +; CHECK-X64-I128-NEXT: callq trailing_arg@PLT +; CHECK-X64-I128-NEXT: popq %rax +; CHECK-X64-I128-NEXT: retq +; +; CHECK-MSVC64-F128-LABEL: call_trailing_arg: +; CHECK-MSVC64-F128: # %bb.0: +; CHECK-MSVC64-F128-NEXT: subq $72, %rsp +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-MSVC64-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx +; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d +; CHECK-MSVC64-F128-NEXT: xorl %r9d, %r9d +; CHECK-MSVC64-F128-NEXT: callq trailing_arg +; CHECK-MSVC64-F128-NEXT: addq $72, %rsp +; CHECK-MSVC64-F128-NEXT: retq +; +; CHECK-MSVC64-I128-LABEL: call_trailing_arg: +; CHECK-MSVC64-I128: # %bb.0: +; CHECK-MSVC64-I128-NEXT: subq $56, %rsp +; CHECK-MSVC64-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-MSVC64-I128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-I128-NEXT: xorl %edx, %edx +; CHECK-MSVC64-I128-NEXT: xorl %r8d, %r8d +; CHECK-MSVC64-I128-NEXT: xorl %r9d, %r9d +; CHECK-MSVC64-I128-NEXT: callq trailing_arg +; CHECK-MSVC64-I128-NEXT: addq $56, %rsp +; CHECK-MSVC64-I128-NEXT: retq +; +; CHECK-MINGW-F128-LABEL: call_trailing_arg: +; CHECK-MINGW-F128: # %bb.0: +; CHECK-MINGW-F128-NEXT: subq $72, %rsp +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-MINGW-F128-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-F128-NEXT: xorl %edx, %edx +; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d +; CHECK-MINGW-F128-NEXT: xorl %r9d, %r9d +; CHECK-MINGW-F128-NEXT: callq trailing_arg +; CHECK-MINGW-F128-NEXT: addq $72, %rsp +; CHECK-MINGW-F128-NEXT: retq +; +; CHECK-MINGW-I128-LABEL: call_trailing_arg: +; CHECK-MINGW-I128: # %bb.0: +; CHECK-MINGW-I128-NEXT: subq $56, %rsp +; CHECK-MINGW-I128-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-MINGW-I128-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-MINGW-I128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-I128-NEXT: xorl %edx, %edx +; CHECK-MINGW-I128-NEXT: xorl %r8d, %r8d +; CHECK-MINGW-I128-NEXT: xorl %r9d, %r9d +; CHECK-MINGW-I128-NEXT: callq trailing_arg +; CHECK-MINGW-I128-NEXT: addq $56, %rsp +; CHECK-MINGW-I128-NEXT: retq +; +; CHECK-X86-LABEL: call_trailing_arg: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: subl $40, %esp +; CHECK-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl $0 +; CHECK-X86-NEXT: pushl %eax +; CHECK-X86-NEXT: calll trailing_arg@PLT +; CHECK-X86-NEXT: addl $88, %esp +; CHECK-X86-NEXT: retl +; +; CHECK-MSVC32-LABEL: call_trailing_arg: +; CHECK-MSVC32: # %bb.0: +; CHECK-MSVC32-NEXT: pushl %ebp +; CHECK-MSVC32-NEXT: movl %esp, %ebp +; CHECK-MSVC32-NEXT: andl $-16, %esp +; CHECK-MSVC32-NEXT: subl $32, %esp +; CHECK-MSVC32-NEXT: movl %esp, %eax +; CHECK-MSVC32-NEXT: pushl 20(%ebp) +; CHECK-MSVC32-NEXT: pushl 16(%ebp) +; CHECK-MSVC32-NEXT: pushl 12(%ebp) +; CHECK-MSVC32-NEXT: pushl 8(%ebp) +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl $0 +; CHECK-MSVC32-NEXT: pushl %eax +; CHECK-MSVC32-NEXT: calll _trailing_arg +; CHECK-MSVC32-NEXT: addl $52, %esp +; CHECK-MSVC32-NEXT: movl %ebp, %esp +; CHECK-MSVC32-NEXT: popl %ebp +; CHECK-MSVC32-NEXT: retl + call PrimTy @trailing_arg(i64 0, i64 0, i64 0, i64 0, PrimTy %x) + ret void +} diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll index 2d8ad6d645bc0..af188ef3a2cf8 100644 --- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll @@ -29,12 +29,10 @@ define double @test2() nounwind { define void @test3(x86_fp80 %X) nounwind { ; CHECK-LABEL: test3: ; CHECK: ## %bb.0: -; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: frob ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X) ret void @@ -248,14 +246,12 @@ entry: define void @fist1(x86_fp80 %x, ptr %p) nounwind ssp { ; CHECK-LABEL: fist1: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistl (%eax) ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl entry: tail call void asm sideeffect "fistl $1", "{st},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, ptr elementtype(i32) %p) nounwind @@ -273,13 +269,11 @@ entry: define x86_fp80 @fist2(x86_fp80 %x, ptr %p) nounwind ssp { ; CHECK-LABEL: fist2: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistl (%eax) ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl entry: %0 = tail call x86_fp80 asm "fistl $2", "=&{st},0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, ptr elementtype(i32) %p) nounwind @@ -294,7 +288,6 @@ entry: define void @fucomp1(x86_fp80 %x, x86_fp80 %y) nounwind ssp { ; CHECK-LABEL: fucomp1: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: fxch %st(1) @@ -302,7 +295,6 @@ define void @fucomp1(x86_fp80 %x, x86_fp80 %y) nounwind ssp { ; CHECK-NEXT: fucomp %st(1) ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl entry: tail call void asm sideeffect "fucomp $1", "{st},f,~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind @@ -322,7 +314,6 @@ entry: define void @fucomp2(x86_fp80 %x, x86_fp80 %y) nounwind ssp { ; CHECK-LABEL: fucomp2: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: fxch %st(1) @@ -330,7 +321,6 @@ define void @fucomp2(x86_fp80 %x, x86_fp80 %y) nounwind ssp { ; CHECK-NEXT: fucomp %st(1) ; CHECK-NEXT: ## InlineAsm End ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl entry: tail call void asm sideeffect "fucomp $1", "{st},{st(1)},~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind @@ -340,14 +330,12 @@ entry: define void @fucomp3(x86_fp80 %x, x86_fp80 %y) nounwind ssp { ; CHECK-LABEL: fucomp3: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: fldt {{[0-9]+}}(%esp) ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fucompp %st(1) ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl entry: tail call void asm sideeffect "fucompp $1", "{st},{st(1)},~{st},~{st(1)},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind diff --git a/llvm/test/CodeGen/X86/isel-fcmp-x87.ll b/llvm/test/CodeGen/X86/isel-fcmp-x87.ll index 8c2a53082649a..84c9750bc326d 100644 --- a/llvm/test/CodeGen/X86/isel-fcmp-x87.ll +++ b/llvm/test/CodeGen/X86/isel-fcmp-x87.ll @@ -33,7 +33,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_oeq: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -43,12 +42,10 @@ ; X86-NEXT: setnp %cl ; X86-NEXT: sete %al ; X86-NEXT: andb %cl, %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_oeq: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) @@ -57,7 +54,6 @@ ; GISEL-X86-NEXT: sete %cl ; GISEL-X86-NEXT: setnp %al ; GISEL-X86-NEXT: andb %cl, %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp oeq x86_fp80 %x, %y ret i1 %1 @@ -85,7 +81,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_ogt: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -93,19 +88,16 @@ ; X86-NEXT: ## kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: seta %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_ogt: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: seta %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp ogt x86_fp80 %x, %y ret i1 %1 @@ -133,7 +125,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_oge: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -141,19 +132,16 @@ ; X86-NEXT: ## kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: setae %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_oge: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setae %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp oge x86_fp80 %x, %y ret i1 %1 @@ -190,7 +178,6 @@ ; ; SDAG-X86-LABEL: fcmp_x86_fp80_olt: ; SDAG-X86: ## %bb.0: -; SDAG-X86-NEXT: subl $12, %esp ; SDAG-X86-NEXT: fldt {{[0-9]+}}(%esp) ; SDAG-X86-NEXT: fldt {{[0-9]+}}(%esp) ; SDAG-X86-NEXT: fucompp @@ -198,12 +185,10 @@ ; SDAG-X86-NEXT: ## kill: def $ah killed $ah killed $ax ; SDAG-X86-NEXT: sahf ; SDAG-X86-NEXT: seta %al -; SDAG-X86-NEXT: addl $12, %esp ; SDAG-X86-NEXT: retl ; ; FAST-X86-LABEL: fcmp_x86_fp80_olt: ; FAST-X86: ## %bb.0: -; FAST-X86-NEXT: subl $12, %esp ; FAST-X86-NEXT: fldt {{[0-9]+}}(%esp) ; FAST-X86-NEXT: fldt {{[0-9]+}}(%esp) ; FAST-X86-NEXT: fxch %st(1) @@ -212,18 +197,15 @@ ; FAST-X86-NEXT: ## kill: def $ah killed $ah killed $ax ; FAST-X86-NEXT: sahf ; FAST-X86-NEXT: seta %al -; FAST-X86-NEXT: addl $12, %esp ; FAST-X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_olt: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: seta %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp olt x86_fp80 %x, %y ret i1 %1 @@ -260,7 +242,6 @@ ; ; SDAG-X86-LABEL: fcmp_x86_fp80_ole: ; SDAG-X86: ## %bb.0: -; SDAG-X86-NEXT: subl $12, %esp ; SDAG-X86-NEXT: fldt {{[0-9]+}}(%esp) ; SDAG-X86-NEXT: fldt {{[0-9]+}}(%esp) ; SDAG-X86-NEXT: fucompp @@ -268,12 +249,10 @@ ; SDAG-X86-NEXT: ## kill: def $ah killed $ah killed $ax ; SDAG-X86-NEXT: sahf ; SDAG-X86-NEXT: setae %al -; SDAG-X86-NEXT: addl $12, %esp ; SDAG-X86-NEXT: retl ; ; FAST-X86-LABEL: fcmp_x86_fp80_ole: ; FAST-X86: ## %bb.0: -; FAST-X86-NEXT: subl $12, %esp ; FAST-X86-NEXT: fldt {{[0-9]+}}(%esp) ; FAST-X86-NEXT: fldt {{[0-9]+}}(%esp) ; FAST-X86-NEXT: fxch %st(1) @@ -282,18 +261,15 @@ ; FAST-X86-NEXT: ## kill: def $ah killed $ah killed $ax ; FAST-X86-NEXT: sahf ; FAST-X86-NEXT: setae %al -; FAST-X86-NEXT: addl $12, %esp ; FAST-X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_ole: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setae %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp ole x86_fp80 %x, %y ret i1 %1 @@ -321,7 +297,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_one: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -329,19 +304,16 @@ ; X86-NEXT: ## kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: setne %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_one: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setne %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp one x86_fp80 %x, %y ret i1 %1 @@ -369,7 +341,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_ord: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -377,19 +348,16 @@ ; X86-NEXT: ## kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: setnp %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_ord: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setnp %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp ord x86_fp80 %x, %y ret i1 %1 @@ -417,7 +385,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_uno: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -425,19 +392,16 @@ ; X86-NEXT: ## kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: setp %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_uno: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setp %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp uno x86_fp80 %x, %y ret i1 %1 @@ -465,7 +429,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_ueq: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -473,19 +436,16 @@ ; X86-NEXT: ## kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: sete %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_ueq: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: sete %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp ueq x86_fp80 %x, %y ret i1 %1 @@ -522,7 +482,6 @@ ; ; SDAG-X86-LABEL: fcmp_x86_fp80_ugt: ; SDAG-X86: ## %bb.0: -; SDAG-X86-NEXT: subl $12, %esp ; SDAG-X86-NEXT: fldt {{[0-9]+}}(%esp) ; SDAG-X86-NEXT: fldt {{[0-9]+}}(%esp) ; SDAG-X86-NEXT: fucompp @@ -530,12 +489,10 @@ ; SDAG-X86-NEXT: ## kill: def $ah killed $ah killed $ax ; SDAG-X86-NEXT: sahf ; SDAG-X86-NEXT: setb %al -; SDAG-X86-NEXT: addl $12, %esp ; SDAG-X86-NEXT: retl ; ; FAST-X86-LABEL: fcmp_x86_fp80_ugt: ; FAST-X86: ## %bb.0: -; FAST-X86-NEXT: subl $12, %esp ; FAST-X86-NEXT: fldt {{[0-9]+}}(%esp) ; FAST-X86-NEXT: fldt {{[0-9]+}}(%esp) ; FAST-X86-NEXT: fxch %st(1) @@ -544,18 +501,15 @@ ; FAST-X86-NEXT: ## kill: def $ah killed $ah killed $ax ; FAST-X86-NEXT: sahf ; FAST-X86-NEXT: setb %al -; FAST-X86-NEXT: addl $12, %esp ; FAST-X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_ugt: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setb %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp ugt x86_fp80 %x, %y ret i1 %1 @@ -592,7 +546,6 @@ ; ; SDAG-X86-LABEL: fcmp_x86_fp80_uge: ; SDAG-X86: ## %bb.0: -; SDAG-X86-NEXT: subl $12, %esp ; SDAG-X86-NEXT: fldt {{[0-9]+}}(%esp) ; SDAG-X86-NEXT: fldt {{[0-9]+}}(%esp) ; SDAG-X86-NEXT: fucompp @@ -600,12 +553,10 @@ ; SDAG-X86-NEXT: ## kill: def $ah killed $ah killed $ax ; SDAG-X86-NEXT: sahf ; SDAG-X86-NEXT: setbe %al -; SDAG-X86-NEXT: addl $12, %esp ; SDAG-X86-NEXT: retl ; ; FAST-X86-LABEL: fcmp_x86_fp80_uge: ; FAST-X86: ## %bb.0: -; FAST-X86-NEXT: subl $12, %esp ; FAST-X86-NEXT: fldt {{[0-9]+}}(%esp) ; FAST-X86-NEXT: fldt {{[0-9]+}}(%esp) ; FAST-X86-NEXT: fxch %st(1) @@ -614,18 +565,15 @@ ; FAST-X86-NEXT: ## kill: def $ah killed $ah killed $ax ; FAST-X86-NEXT: sahf ; FAST-X86-NEXT: setbe %al -; FAST-X86-NEXT: addl $12, %esp ; FAST-X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_uge: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setbe %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp uge x86_fp80 %x, %y ret i1 %1 @@ -653,7 +601,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_ult: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -661,19 +608,16 @@ ; X86-NEXT: ## kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: setb %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_ult: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setb %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp ult x86_fp80 %x, %y ret i1 %1 @@ -701,7 +645,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_ule: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -709,19 +652,16 @@ ; X86-NEXT: ## kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf ; X86-NEXT: setbe %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_ule: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) ; GISEL-X86-NEXT: fucompi %st(1), %st ; GISEL-X86-NEXT: fstp %st(0) ; GISEL-X86-NEXT: setbe %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp ule x86_fp80 %x, %y ret i1 %1 @@ -753,7 +693,6 @@ ; ; X86-LABEL: fcmp_x86_fp80_une: ; X86: ## %bb.0: -; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fucompp @@ -763,12 +702,10 @@ ; X86-NEXT: setp %cl ; X86-NEXT: setne %al ; X86-NEXT: orb %cl, %al -; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; ; GISEL-X86-LABEL: fcmp_x86_fp80_une: ; GISEL-X86: ## %bb.0: -; GISEL-X86-NEXT: subl $12, %esp ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) ; GISEL-X86-NEXT: fxch %st(1) @@ -777,7 +714,6 @@ ; GISEL-X86-NEXT: setne %cl ; GISEL-X86-NEXT: setp %al ; GISEL-X86-NEXT: orb %cl, %al -; GISEL-X86-NEXT: addl $12, %esp ; GISEL-X86-NEXT: retl %1 = fcmp une x86_fp80 %x, %y ret i1 %1 diff --git a/llvm/test/CodeGen/X86/kcfi-arity.ll b/llvm/test/CodeGen/X86/kcfi-arity.ll index 009fa7d2dc0a4..5a19bcd7835ea 100644 --- a/llvm/test/CodeGen/X86/kcfi-arity.ll +++ b/llvm/test/CodeGen/X86/kcfi-arity.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=x86_64-unknown-none -verify-machineinstrs < %s | FileCheck %s --check-prefix=ASM ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck %s --check-prefixes=MIR,ISEL ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs -stop-after=kcfi < %s | FileCheck %s --check-prefixes=MIR,KCFI diff --git a/llvm/test/CodeGen/X86/long-double-abi-align.ll b/llvm/test/CodeGen/X86/long-double-abi-align.ll index fcb88714f8b82..02d68ada9a8d4 100644 --- a/llvm/test/CodeGen/X86/long-double-abi-align.ll +++ b/llvm/test/CodeGen/X86/long-double-abi-align.ll @@ -73,7 +73,7 @@ define void @foo(i32 %0, x86_fp80 %1, i32 %2) nounwind { ; DARWIN-LABEL: foo: ; DARWIN: ## %bb.0: ; DARWIN-NEXT: subl $44, %esp -; DARWIN-NEXT: fldt 64(%esp) +; DARWIN-NEXT: fldt 52(%esp) ; DARWIN-NEXT: fstpt 16(%esp) ; DARWIN-NEXT: leal 48(%esp), %eax ; DARWIN-NEXT: movl %eax, (%esp) @@ -81,7 +81,7 @@ define void @foo(i32 %0, x86_fp80 %1, i32 %2) nounwind { ; DARWIN-NEXT: leal 16(%esp), %eax ; DARWIN-NEXT: movl %eax, (%esp) ; DARWIN-NEXT: calll _escape -; DARWIN-NEXT: leal 80(%esp), %eax +; DARWIN-NEXT: leal 68(%esp), %eax ; DARWIN-NEXT: movl %eax, (%esp) ; DARWIN-NEXT: calll _escape ; DARWIN-NEXT: addl $44, %esp diff --git a/llvm/test/CodeGen/X86/pr78897.ll b/llvm/test/CodeGen/X86/pr78897.ll index 0caa569107c0c..4613c2bcdcaf4 100644 --- a/llvm/test/CodeGen/X86/pr78897.ll +++ b/llvm/test/CodeGen/X86/pr78897.ll @@ -225,9 +225,9 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind { ; X86-AVX512-NEXT: pushl %esi ; X86-AVX512-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0 ; X86-AVX512-NEXT: vmovd %xmm0, %eax -; X86-AVX512-NEXT: kmovd %eax, %k0 -; X86-AVX512-NEXT: knotw %k0, %k1 -; X86-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] +; X86-AVX512-NEXT: kmovd %eax, %k1 +; X86-AVX512-NEXT: knotw %k1, %k2 +; X86-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k2} {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] ; X86-AVX512-NEXT: vpextrd $1, %xmm0, %eax ; X86-AVX512-NEXT: vmovd %xmm0, %edx ; X86-AVX512-NEXT: movl $286331152, %ecx # imm = 0x11111110 @@ -247,9 +247,9 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind { ; X86-AVX512-NEXT: addl %edx, %eax ; X86-AVX512-NEXT: vmovd %esi, %xmm1 ; X86-AVX512-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; X86-AVX512-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} -; X86-AVX512-NEXT: vpsrlw $4, %xmm1, %xmm0 -; X86-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X86-AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} +; X86-AVX512-NEXT: vpsrlw $4, %xmm0, %xmm1 +; X86-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X86-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 ; X86-AVX512-NEXT: popl %esi ; X86-AVX512-NEXT: popl %edi @@ -258,9 +258,9 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind { ; ; X64-AVX512-LABEL: produceShuffleVectorForByte: ; X64-AVX512: # %bb.0: # %entry -; X64-AVX512-NEXT: kmovd %edi, %k0 -; X64-AVX512-NEXT: knotw %k0, %k1 -; X64-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] +; X64-AVX512-NEXT: kmovd %edi, %k1 +; X64-AVX512-NEXT: knotw %k1, %k2 +; X64-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k2} {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u] ; X64-AVX512-NEXT: vmovq %xmm0, %rax ; X64-AVX512-NEXT: movabsq $1229782938247303440, %rcx # imm = 0x1111111111111110 ; X64-AVX512-NEXT: movabsq $76861433640456465, %rdx # imm = 0x111111111111111 @@ -269,9 +269,9 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind { ; X64-AVX512-NEXT: vmovq %rax, %xmm0 ; X64-AVX512-NEXT: imulq %rcx, %rdx ; X64-AVX512-NEXT: vmovq %rdx, %xmm1 -; X64-AVX512-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} -; X64-AVX512-NEXT: vpsrlw $4, %xmm1, %xmm0 -; X64-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X64-AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} +; X64-AVX512-NEXT: vpsrlw $4, %xmm0, %xmm1 +; X64-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X64-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; X64-AVX512-NEXT: retq entry: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll index 0421d525890e7..b292a8a9b1d66 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll @@ -11436,8 +11436,11 @@ define <16 x i32>@test_int_x86_avx512_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1) ; CHECK-LABEL: @test_int_x86_avx512_pabs_d_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[X0:%.*]], i1 false) -; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i32> [[X0:%.*]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> splat (i32 -1), <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 false, <16 x i32> [[TMP3]], <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[X0]], i1 false) +; CHECK-NEXT: store <16 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) @@ -11451,12 +11454,15 @@ define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[X0:%.*]], i1 false) +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i32> [[X0:%.*]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> splat (i32 -1), <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, <16 x i32> [[TMP13]], <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[X0]], i1 false) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP14]], <16 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], [[X1:%.*]] -; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP14]] ; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] ; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> [[X1]] @@ -11473,8 +11479,11 @@ define <8 x i64>@test_int_x86_avx512_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 ; CHECK-LABEL: @test_int_x86_avx512_pabs_q_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> [[X0:%.*]], i1 false) -; CHECK-NEXT: store <8 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <8 x i64> [[X0:%.*]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> splat (i64 -1), <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 false, <8 x i64> [[TMP3]], <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> [[X0]], i1 false) +; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[TMP2]] ; %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) @@ -11488,12 +11497,15 @@ define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x ; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> [[X0:%.*]], i1 false) +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <8 x i64> [[X0:%.*]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> splat (i64 -1), <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, <8 x i64> [[TMP13]], <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> [[X0]], i1 false) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP14]], <8 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], [[X1:%.*]] -; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP14]] ; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] ; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> [[X1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll index a41f26a0e3c1c..18441b2d7e253 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll @@ -4443,8 +4443,11 @@ define <32 x i16> @test_int_x86_avx512_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1 ; CHECK-LABEL: @test_int_x86_avx512_pabs_w_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> [[X0:%.*]], i1 false) -; CHECK-NEXT: store <32 x i16> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <32 x i16> [[X0:%.*]], splat (i16 -32768) +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> splat (i16 -1), <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 false, <32 x i16> [[TMP3]], <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> [[X0]], i1 false) +; CHECK-NEXT: store <32 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) @@ -4457,12 +4460,15 @@ define <32 x i16> @test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> [[X0:%.*]], i1 false) +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <32 x i16> [[X0:%.*]], splat (i16 -32768) +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP12]], <32 x i16> splat (i16 -1), <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, <32 x i16> [[TMP13]], <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> [[X0]], i1 false) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP1]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP14]], <32 x i16> [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP4]], [[X1:%.*]] -; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP14]] ; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], [[TMP3]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP10]], <32 x i16> [[TMP7]] ; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP4]], <32 x i16> [[X1]] @@ -4479,8 +4485,11 @@ define <64 x i8> @test_int_x86_avx512_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1) n ; CHECK-LABEL: @test_int_x86_avx512_pabs_b_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> [[X0:%.*]], i1 false) -; CHECK-NEXT: store <64 x i8> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <64 x i8> [[X0:%.*]], splat (i8 -128) +; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> splat (i8 -1), <64 x i8> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 false, <64 x i8> [[TMP3]], <64 x i8> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> [[X0]], i1 false) +; CHECK-NEXT: store <64 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP2]] ; %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) @@ -4493,12 +4502,15 @@ define <64 x i8> @test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> % ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> [[X0:%.*]], i1 false) +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <64 x i8> [[X0:%.*]], splat (i8 -128) +; CHECK-NEXT: [[TMP13:%.*]] = select <64 x i1> [[TMP12]], <64 x i8> splat (i8 -1), <64 x i8> [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, <64 x i8> [[TMP13]], <64 x i8> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> [[X0]], i1 false) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP2]] to <64 x i1> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[X2:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP1]], <64 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP14]], <64 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <64 x i8> [[TMP4]], [[X1:%.*]] -; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[TMP14]] ; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i8> [[TMP9]], [[TMP3]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP5]], <64 x i8> [[TMP10]], <64 x i8> [[TMP7]] ; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP6]], <64 x i8> [[TMP4]], <64 x i8> [[X1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll index fd16dfe54e675..11af676eed7b4 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/abs-vector.ll @@ -6,19 +6,22 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 target triple = "x86_64-unknown-linux-gnu" define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a) local_unnamed_addr #0 { -; CHECK-LABEL: @test_mm256_abs_epi8( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 -; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> -; CHECK-NEXT: [[TMP4:%.*]] = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> [[TMP3]], i1 false) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i8> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i8> [[TMP4]] to <4 x i64> -; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8 -; ORIGIN-NEXT: store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4 -; CHECK: ret <4 x i64> [[TMP6]] +; ORIGIN-LABEL: @test_mm256_abs_epi8( +; ORIGIN-NEXT: entry: +; ORIGIN-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; ORIGIN-NEXT: call void @llvm.donothing() +; ORIGIN-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8> +; ORIGIN-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> +; ORIGIN-NEXT: [[TMP4:%.*]] = icmp eq <32 x i8> [[TMP3]], splat (i8 -128) +; ORIGIN-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 -1), <32 x i8> [[TMP2]] +; ORIGIN-NEXT: [[TMP6:%.*]] = select i1 false, <32 x i8> [[TMP5]], <32 x i8> [[TMP2]] +; ORIGIN-NEXT: [[TMP7:%.*]] = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> [[TMP3]], i1 false) +; ORIGIN-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[TMP6]] to <4 x i64> +; ORIGIN-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP7]] to <4 x i64> +; ORIGIN-NEXT: store <4 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 +; ORIGIN-NEXT: store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4 +; ORIGIN-NEXT: ret <4 x i64> [[TMP9]] ; entry: %0 = bitcast <4 x i64> %a to <32 x i8> @@ -28,19 +31,22 @@ entry: } define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a) local_unnamed_addr #0 { -; CHECK-LABEL: @test_mm256_abs_epi16( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 -; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <16 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP3]], i1 false) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64> -; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8 -; ORIGIN-NEXT: store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4 -; CHECK: ret <4 x i64> [[TMP6]] +; ORIGIN-LABEL: @test_mm256_abs_epi16( +; ORIGIN-NEXT: entry: +; ORIGIN-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; ORIGIN-NEXT: call void @llvm.donothing() +; ORIGIN-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <16 x i16> +; ORIGIN-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16> +; ORIGIN-NEXT: [[TMP4:%.*]] = icmp eq <16 x i16> [[TMP3]], splat (i16 -32768) +; ORIGIN-NEXT: [[TMP5:%.*]] = select <16 x i1> [[TMP4]], <16 x i16> splat (i16 -1), <16 x i16> [[TMP2]] +; ORIGIN-NEXT: [[TMP6:%.*]] = select i1 false, <16 x i16> [[TMP5]], <16 x i16> [[TMP2]] +; ORIGIN-NEXT: [[TMP7:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP3]], i1 false) +; ORIGIN-NEXT: [[TMP8:%.*]] = bitcast <16 x i16> [[TMP6]] to <4 x i64> +; ORIGIN-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP7]] to <4 x i64> +; ORIGIN-NEXT: store <4 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 +; ORIGIN-NEXT: store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4 +; ORIGIN-NEXT: ret <4 x i64> [[TMP9]] ; entry: %0 = bitcast <4 x i64> %a to <16 x i16> @@ -50,19 +56,22 @@ entry: } define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a) local_unnamed_addr #0 { -; CHECK-LABEL: @test_mm256_abs_epi32( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 -; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 -; CHECK: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> [[TMP3]], i1 false) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64> -; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8 -; ORIGIN-NEXT: store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4 -; CHECK: ret <4 x i64> [[TMP6]] +; ORIGIN-LABEL: @test_mm256_abs_epi32( +; ORIGIN-NEXT: entry: +; ORIGIN-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; ORIGIN-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 +; ORIGIN-NEXT: call void @llvm.donothing() +; ORIGIN-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP0]] to <8 x i32> +; ORIGIN-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> +; ORIGIN-NEXT: [[TMP4:%.*]] = icmp eq <8 x i32> [[TMP3]], splat (i32 -2147483648) +; ORIGIN-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> splat (i32 -1), <8 x i32> [[TMP2]] +; ORIGIN-NEXT: [[TMP6:%.*]] = select i1 false, <8 x i32> [[TMP5]], <8 x i32> [[TMP2]] +; ORIGIN-NEXT: [[TMP7:%.*]] = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> [[TMP3]], i1 false) +; ORIGIN-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP6]] to <4 x i64> +; ORIGIN-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP7]] to <4 x i64> +; ORIGIN-NEXT: store <4 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 +; ORIGIN-NEXT: store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4 +; ORIGIN-NEXT: ret <4 x i64> [[TMP9]] ; entry: %0 = bitcast <4 x i64> %a to <8 x i32> diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s index 95a9268112920..56c8d7ec07496 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s @@ -28,3 +28,15 @@ s_rfe_i64 s[2:3] s_rfe_b64 s[2:3] // GFX1250: s_rfe_i64 s[2:3] ; encoding: [0x02,0x4a,0x80,0xbe] + +s_barrier_signal -3 +// GFX1250: s_barrier_signal -3 ; encoding: [0xc3,0x4e,0x80,0xbe] + +s_get_barrier_state s3, -3 +// GFX1250: s_get_barrier_state s3, -3 ; encoding: [0xc3,0x50,0x83,0xbe] + +s_get_barrier_state s3, -4 +// GFX1250: s_get_barrier_state s3, -4 ; encoding: [0xc4,0x50,0x83,0xbe] + +s_get_barrier_state s3, m0 +// GFX1250: s_get_barrier_state s3, m0 ; encoding: [0x7d,0x50,0x83,0xbe] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s index 6ebc17468eed6..234c2ed0de793 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s @@ -1,6 +1,26 @@ // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR --implicit-check-not=error: -strict-whitespace %s +s_wait_asynccnt 0x1234 +// GFX1250: [0x34,0x12,0xca,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_wait_asynccnt 0xc1d1 +// GFX1250: [0xd1,0xc1,0xca,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_wait_tensorcnt 0x0 +// GFX1250: [0x00,0x00,0xcb,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_wait_tensorcnt 0x1 +// GFX1250: [0x01,0x00,0xcb,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_wait_tensorcnt 0x3 +// GFX1250: [0x03,0x00,0xcb,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + s_wait_xcnt 0x0 // GFX1250: [0x00,0x00,0xc5,0xbf] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s new file mode 100644 index 0000000000000..cc14e4caf851e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s @@ -0,0 +1,163 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200-ERR --implicit-check-not=error: %s + +v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3] ; encoding: [0x04,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[6:7], v[254:255], 0x405ec00000000000, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], v[254:255], 0x405ec000, v[2:3] ; encoding: [0xfe,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], s[2:3], lit64(0x405ec00012345678), v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[6:7], vcc, 0x405ec000, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], vcc, 0x405ec000, v[2:3] ; encoding: [0x6a,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[6:7], exec, 0x405ec000, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], exec, 0x405ec000, v[2:3] ; encoding: [0x7e,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[6:7], null, 0x405ec000, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], null, 0x405ec000, v[2:3] ; encoding: [0x7c,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[6:7], -1, 0x405ec000, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], -1, 0x405ec000, v[2:3] ; encoding: [0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[6:7], 0.5, 0x405ec000, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], 0.5, 0x405ec000, v[2:3] ; encoding: [0xf0,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[6:7], src_scc, 0x405ec000, v[2:3] +// GFX1250: v_fmamk_f64 v[6:7], src_scc, 0x405ec000, v[2:3] ; encoding: [0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[254:255] +// GFX1250: v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255] +// GFX1250: v_fmamk_f64 v[254:255], lit64(0x405ec00012345678), lit64(0x405ec00012345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[254:255], 123.0, 0x405ec000, v[2:3] +// GFX1250: v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[2:3] ; encoding: [0xfe,0x04,0xfc,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[4:5], v[2:3], 123.1, v[6:7] +// GFX1250: v_fmamk_f64 v[4:5], v[2:3], lit64(0x405ec66666666666), v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[4:5], 0x405ec66666666666, 123.1, v[6:7] +// GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[4:5], 123.1, 123.1, v[8:9] +// GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[4:5], 1.0, 1.0, v[6:7] +// GFX1250: v_fmamk_f64 v[4:5], 1.0, 0x3ff00000, v[6:7] ; encoding: [0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[4:5], 1e-320, 1e-320, v[6:7] +// GFX1250: v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[4:5], lit64(0x7e8), 1e-320, v[8:9] +// GFX1250: v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[8:9] +// GFX1250: v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], v[4:5], v[8:9], 0x405ec000 +// GFX1250: v_fmaak_f64 v[6:7], v[4:5], v[8:9], 0x405ec000 ; encoding: [0x04,0x11,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], v[254:255], v[8:9], 0x405ec00000000000 +// GFX1250: v_fmaak_f64 v[6:7], v[254:255], v[8:9], 0x405ec000 ; encoding: [0xfe,0x11,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678 +// GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], lit64(0x405ec00012345678) ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], vcc, v[8:9], 0x405ec000 +// GFX1250: v_fmaak_f64 v[6:7], vcc, v[8:9], 0x405ec000 ; encoding: [0x6a,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], exec, v[8:9], 0x405ec000 +// GFX1250: v_fmaak_f64 v[6:7], exec, v[8:9], 0x405ec000 ; encoding: [0x7e,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], null, v[8:9], 0x405ec000 +// GFX1250: v_fmaak_f64 v[6:7], null, v[8:9], 0x405ec000 ; encoding: [0x7c,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], -1, v[8:9], 0x405ec000 +// GFX1250: v_fmaak_f64 v[6:7], -1, v[8:9], 0x405ec000 ; encoding: [0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], 0.5, v[8:9], 0x405ec000 +// GFX1250: v_fmaak_f64 v[6:7], 0.5, v[8:9], 0x405ec000 ; encoding: [0xf0,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[6:7], src_scc, v[8:9], 0x405ec000 +// GFX1250: v_fmaak_f64 v[6:7], src_scc, v[8:9], 0x405ec000 ; encoding: [0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[254:255], 0x405ec000, v[254:255], 0x405ec000 +// GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[254:255], 0x405ec000 ; encoding: [0xfe,0xfc,0xfd,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[254:255], 0x405ec00000000000, v[254:255], 0x405ec00000000000 +// GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[254:255], 0x405ec000 ; encoding: [0xfe,0xfc,0xfd,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 +// GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit(0x405ec00012345678) +// GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[254:255], 123.0, v[2:3], 0x405ec000 +// GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[2:3], 0x405ec000 ; encoding: [0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[4:5], v[2:3], v[2:3], 123.1 +// GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], lit64(0x405ec66666666666) ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 123.1 +// GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[6:7], lit64(0x405ec66666666666) ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[4:5], 123.1, v[8:9], 123.1 +// GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[8:9], lit64(0x405ec66666666666) ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[4:5], 1.0, v[8:9], 1.0 +// GFX1250: v_fmaak_f64 v[4:5], 1.0, v[8:9], 0x3ff00000 ; encoding: [0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[4:5], 1e-320, v[6:7], 1e-320 +// GFX1250: v_fmaak_f64 v[4:5], lit64(0x7e8), v[6:7], lit64(0x7e8) ; encoding: [0xfe,0x0c,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[4:5], lit64(0x7e8), v[8:9], 1e-320 +// GFX1250: v_fmaak_f64 v[4:5], lit64(0x7e8), v[8:9], lit64(0x7e8) ; encoding: [0xfe,0x10,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_fmaak_f64 v[4:5], lit64(0x7e8), v[8:9], lit64(0x7e8) +// GFX1250: v_fmaak_f64 v[4:5], lit64(0x7e8), v[8:9], lit64(0x7e8) ; encoding: [0xfe,0x10,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s new file mode 100644 index 0000000000000..b68306d60cf8c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s @@ -0,0 +1,21 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_fmaak_f32 v4, v2, v6, 3 row_share:1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fmaak_f32 v4, v2, v6, 3 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_fmamk_f32 v4, v2, 3, v6 row_share:1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fmamk_f32 v4, v2, 3, v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_fmaak_f16 v4, v2, v6, 3 row_share:1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fmaak_f16 v4, v2, v6, 3 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_fmamk_f16 v4, v2, 3, v6 row_share:1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fmamk_f16 v4, v2, 3, v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vopd.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd.s new file mode 100644 index 0000000000000..9c4c57602ecd3 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd.s @@ -0,0 +1,16276 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s + +v_dual_add_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x08,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x08,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x08,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x08,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x08,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x08,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x08,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x08,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x08,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x08,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x08,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x08,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x08,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x08,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x08,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x08,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x08,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x08,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x20,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x20,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x20,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x20,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x20,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x20,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x20,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x20,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x20,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x20,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x20,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x20,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x20,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x20,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x20,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x20,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x20,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x20,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x12,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x12,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x12,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x12,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x12,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x12,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x12,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x12,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x12,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x12,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x12,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x12,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x12,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x12,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x12,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x12,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x12,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x02,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x02,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x02,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x02,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x02,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x02,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x02,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x02,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x02,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x02,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x02,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x02,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x02,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x02,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x02,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x02,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x02,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x00,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x00,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x00,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x00,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x00,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x00,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x00,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x00,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x00,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x00,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x00,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x00,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x00,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x00,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x00,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x00,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x00,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x00,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x05,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x05,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x05,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x05,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x05,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x05,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x05,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x05,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x05,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x05,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x05,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x05,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x05,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x05,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x05,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x04,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x04,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x04,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x22,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x22,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x22,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x22,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x22,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x22,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x22,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x22,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x22,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x22,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x22,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x22,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x22,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x22,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x22,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x22,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x22,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x22,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x14,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x14,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x14,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x14,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x14,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x14,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x14,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x14,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x14,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x14,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x14,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x14,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x14,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x14,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x14,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x14,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x14,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x14,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x16,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x16,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x16,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x16,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x16,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x16,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x16,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x16,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x16,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x16,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x16,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x16,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x16,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x16,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x16,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x16,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x16,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x16,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_add_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x11,0xc9,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_add_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x11,0xc9,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_add_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x11,0xc9,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_add_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x11,0xc9,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_add_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x11,0xc9,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_add_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x11,0xc9,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_add_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x11,0xc9,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_add_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x11,0xc9,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_add_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x11,0xc9,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_add_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x11,0xc9,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_add_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x11,0xc9,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x11,0xc9,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x11,0xc9,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_add_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x11,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_add_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x11,0xc9,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x10,0xc9,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x10,0xc9,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x10,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0e,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0e,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0e,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0e,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0e,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0e,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0e,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0e,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0e,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0e,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0e,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0e,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0e,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0e,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0e,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0e,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x06,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x06,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x06,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x06,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x06,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x06,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x06,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x06,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x06,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x06,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x06,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x06,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x06,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x06,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x06,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x06,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x06,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x06,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0a,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0a,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0a,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0a,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0a,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0a,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0a,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0a,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0a,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0a,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0a,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0a,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0a,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0a,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0a,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0a,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0c,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0c,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0c,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0c,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0c,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0c,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0c,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0c,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0c,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0c,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0c,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0c,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0c,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0c,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0c,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0c,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x48,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x48,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x48,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x48,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x48,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x48,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x48,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x48,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x48,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x48,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x48,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x48,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x48,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x48,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x60,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x60,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x60,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x60,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x60,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x69,0x04,0x60,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x01,0x04,0x60,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x60,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x60,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x60,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x60,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x60,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x60,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x60,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x60,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x60,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x60,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x60,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x52,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x52,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x52,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x52,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x52,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x52,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x52,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x52,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x52,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x52,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x52,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x52,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x52,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x52,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x52,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x52,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x52,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x42,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x42,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x42,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x42,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x42,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x42,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, -1, v2, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, -1, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x42,0xca,0xc1,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmaak_f32 v6, 0.5, v5, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmaak_f32 v6, 0.5, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x42,0xca,0xf0,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x42,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x40,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x40,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x40,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x40,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x40,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x40,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x40,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x40,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x40,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x40,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x40,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x40,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x40,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x40,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x40,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x40,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x40,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x40,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_cndmask_b32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x45,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_cndmask_b32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x45,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_cndmask_b32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x45,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_cndmask_b32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x45,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_cndmask_b32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x45,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x45,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x44,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x44,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x44,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x62,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x62,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x62,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x62,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x62,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x62,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x62,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x62,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x62,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x62,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x62,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x62,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x62,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x62,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x62,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x62,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x62,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x62,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x54,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x54,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x54,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x54,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x54,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x54,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x54,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x54,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x54,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x54,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x54,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x54,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x54,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x54,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x54,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x54,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x54,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x54,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x56,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x56,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x56,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x56,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x56,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x56,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x56,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x56,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x56,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x56,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x56,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x56,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x56,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x56,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x56,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x56,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x56,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x56,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_cndmask_b32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x51,0xca,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_cndmask_b32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x51,0xca,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_cndmask_b32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x51,0xca,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x51,0xca,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_cndmask_b32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x51,0xca,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_cndmask_b32 v255, s105, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x69,0xfe,0x51,0xca,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_cndmask_b32 v255, s1, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x01,0xfe,0x51,0xca,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7b,0xfe,0x51,0xca,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x7f,0xfe,0x51,0xca,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x7e,0xfe,0x51,0xca,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_cndmask_b32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x51,0xca,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x6b,0xfe,0x51,0xca,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x6a,0xfe,0x51,0xca,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x51,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x51,0xca,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x50,0xca,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x50,0xca,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x50,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4e,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4e,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4e,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4e,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4e,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4e,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4e,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4e,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4e,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4e,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4e,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4e,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4e,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4e,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4e,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4e,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x46,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x46,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x46,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x46,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x46,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x46,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x46,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x46,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x46,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x46,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x46,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x46,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x46,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x46,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x46,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x46,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x46,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x46,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4a,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4a,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4a,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4a,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4a,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4a,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4a,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4a,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4a,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4a,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4a,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4a,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4a,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4a,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4a,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4a,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4c,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4c,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4c,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4c,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4c,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4c,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4c,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4c,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4c,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4c,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4c,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4c,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4c,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4c,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4c,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4c,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x48,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x48,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x48,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x48,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x48,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x48,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x48,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x48,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x48,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x48,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x48,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x48,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x48,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x48,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x60,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x60,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x60,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x60,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x60,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x69,0x04,0x60,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x01,0x04,0x60,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x60,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x60,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x60,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x60,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x60,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x60,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x60,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x60,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x60,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x60,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x60,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x52,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x52,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x52,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x52,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x52,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x52,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_cndmask_b32 v6, -1, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_cndmask_b32 v6, -1, v2 ; encoding: [0xf0,0x06,0x52,0xc8,0xc1,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_cndmask_b32 v6, 0.5, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_cndmask_b32 v6, 0.5, v5 ; encoding: [0xc1,0x08,0x52,0xc8,0xf0,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x52,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x42,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x42,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x42,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x42,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x42,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x42,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x42,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x42,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x42,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x42,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x42,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x42,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x42,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x42,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x42,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x42,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x42,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x42,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x40,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x40,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x40,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x40,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x40,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x40,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x40,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x40,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x40,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x40,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x40,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x40,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x40,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x40,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x40,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x40,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x40,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x40,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, v4, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x45,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, v1, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x45,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, v255, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x45,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, v2, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x45,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, v3, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x45,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, s105, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x45,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, s1, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x45,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x45,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x45,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x45,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, m0, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x45,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x45,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x45,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x45,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x45,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x44,0xc8,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x44,0xc8,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x44,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x62,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x62,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x62,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x62,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x62,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x62,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x62,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x62,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x62,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x62,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x62,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x62,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x62,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x62,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x62,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x62,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x62,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x62,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x54,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x54,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x54,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x54,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x54,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x54,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x54,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x54,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x54,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x54,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x54,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x54,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x54,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x54,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x54,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x54,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x54,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x54,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x56,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x56,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x56,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x56,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x56,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x56,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x56,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x56,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x56,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x56,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x56,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x56,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x56,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x56,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x56,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x56,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x56,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x56,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v255, 0xaf123456 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_fmaak_f32 v255, v4, v255, 0xaf123456 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x51,0xc8,0x01,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v255, 0xaf123456 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_fmaak_f32 v255, v1, v255, 0xaf123456 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x51,0xc8,0xff,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v255, 0xaf123456 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_fmaak_f32 v255, v255, v255, 0xaf123456 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x51,0xc8,0x02,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v255, 0xaf123456 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v255, 0xaf123456 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x51,0xc8,0x03,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v255, 0xaf123456 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_fmaak_f32 v255, v3, v255, 0xaf123456 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x51,0xc8,0x04,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v255, 0xaf123456 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_fmaak_f32 v255, s105, v255, 0xaf123456 :: v_dual_mov_b32 v6, s105 ; encoding: [0x69,0xfe,0x51,0xc8,0x69,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v255, 0xaf123456 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_fmaak_f32 v255, s1, v255, 0xaf123456 :: v_dual_mov_b32 v6, s1 ; encoding: [0x01,0xfe,0x51,0xc8,0x01,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v255, 0xaf123456 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v255, 0xaf123456 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7b,0xfe,0x51,0xc8,0x7b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v255, 0xaf123456 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v255, 0xaf123456 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x7f,0xfe,0x51,0xc8,0x7f,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v255, 0xaf123456 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v255, 0xaf123456 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x7e,0xfe,0x51,0xc8,0x7e,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v255, 0xaf123456 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_fmaak_f32 v255, m0, v255, 0xaf123456 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x51,0xc8,0x7d,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v255, 0xaf123456 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v255, 0xaf123456 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x6b,0xfe,0x51,0xc8,0x6b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v255, 0xaf123456 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v255, 0xaf123456 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x6a,0xfe,0x51,0xc8,0x6a,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x51,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v255, 0xaf123456 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v255, 0xaf123456 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x51,0xc8,0xc1,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x50,0xc8,0xf0,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x50,0xc8,0xfd,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x50,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4e,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4e,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4e,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4e,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4e,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4e,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4e,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4e,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4e,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4e,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4e,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4e,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4e,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4e,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4e,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4e,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x46,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x46,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x46,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x46,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x46,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x46,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x46,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x46,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x46,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x46,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x46,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x46,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x46,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x46,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x46,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x46,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x46,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x46,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4a,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4a,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4a,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4a,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4a,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4a,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4a,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4a,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4a,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4a,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4a,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4a,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4a,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4a,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4a,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4a,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4c,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4c,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4c,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4c,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4c,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4c,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4c,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4c,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4c,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4c,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4c,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4c,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4c,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4c,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4c,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4c,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x08,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x08,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x08,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x08,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x08,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x08,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x08,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x08,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x08,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x08,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x08,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x08,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x08,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x08,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x08,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x08,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x08,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x08,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x20,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x20,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x20,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x20,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x20,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x20,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x20,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x20,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x20,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x20,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x20,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x20,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x20,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x20,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x20,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x20,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x20,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x20,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x12,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x12,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x12,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x12,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x12,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x12,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x12,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x12,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x12,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x12,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x12,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x12,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x12,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x12,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x12,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x12,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x12,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x12,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x02,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x02,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x02,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x02,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x02,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x02,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x02,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x02,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x02,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x02,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x02,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x02,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x02,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x02,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x02,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x02,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x02,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x00,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x00,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x00,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x00,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x00,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x00,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x00,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x00,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x00,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x00,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x00,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x00,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x00,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x00,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x00,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x00,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x00,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x00,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v4 ; encoding: [0x04,0xff,0x05,0xc8,0x01,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v4 ; encoding: [0x01,0xff,0x05,0xc8,0xff,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v4 ; encoding: [0xff,0xff,0x05,0xc8,0x02,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v4 ; encoding: [0x02,0xff,0x05,0xc8,0x03,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v4 ; encoding: [0x03,0xff,0x05,0xc8,0x04,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v4 ; encoding: [0x69,0xfe,0x05,0xc8,0x69,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v4 ; encoding: [0x01,0xfe,0x05,0xc8,0x01,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v4 ; encoding: [0x7b,0xfe,0x05,0xc8,0x7b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v4 ; encoding: [0x7f,0xfe,0x05,0xc8,0x7f,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v4 ; encoding: [0x7e,0xfe,0x05,0xc8,0x7e,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v4 ; encoding: [0x7d,0xfe,0x05,0xc8,0x7d,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v4 ; encoding: [0x6b,0xfe,0x05,0xc8,0x6b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v4 ; encoding: [0x6a,0xfe,0x05,0xc8,0x6a,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 ; encoding: [0xff,0xfe,0x05,0xc8,0x7c,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v4 ; encoding: [0xfd,0xfe,0x05,0xc8,0xc1,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v4 ; encoding: [0xf0,0x06,0x04,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v4 ; encoding: [0xc1,0x08,0x04,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0x7c,0x0a,0x04,0xc8,0xff,0xfe,0xff,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x22,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x22,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x22,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x22,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x22,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x22,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x22,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x22,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x22,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x22,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x22,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x22,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x22,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x22,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x22,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x22,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x22,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x22,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x14,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x14,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x14,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x14,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x14,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x14,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x14,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x14,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x14,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x14,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x14,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x14,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x14,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x14,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x14,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x14,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x14,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x14,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x16,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x16,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x16,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x16,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x16,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x16,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x16,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x16,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x16,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x16,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x16,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x16,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x16,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x16,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x16,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x16,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x16,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x16,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_fmac_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x11,0xc8,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_fmac_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x11,0xc8,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_fmac_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x11,0xc8,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x11,0xc8,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_fmac_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x11,0xc8,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_fmac_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x11,0xc8,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_fmac_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x11,0xc8,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x11,0xc8,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x11,0xc8,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x11,0xc8,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_fmac_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x11,0xc8,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x11,0xc8,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x11,0xc8,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x11,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x11,0xc8,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x10,0xc8,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x10,0xc8,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x10,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0e,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0e,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0e,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0e,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0e,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0e,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0e,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0e,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0e,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0e,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0e,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0e,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0e,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0e,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0e,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0e,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x06,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x06,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x06,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x06,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x06,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x06,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x06,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x06,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x06,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x06,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x06,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x06,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x06,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x06,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x06,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x06,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x06,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x06,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0a,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0a,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0a,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0a,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0a,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0a,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0a,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0a,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0a,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0a,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0a,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0a,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0a,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0a,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0a,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0a,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0c,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0c,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0c,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0c,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0c,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0c,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0c,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0c,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0c,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0c,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0c,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0c,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0c,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0c,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0c,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0c,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_add_f32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_add_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x89,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_add_f32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_add_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x89,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_add_f32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_add_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x89,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_add_f32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_add_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x89,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_add_f32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_add_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x89,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_add_f32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_add_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x89,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_add_f32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_add_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x89,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_add_f32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_add_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x89,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_add_f32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_add_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x89,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_add_f32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_add_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x89,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_add_f32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_add_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x89,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_add_f32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_add_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x89,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_add_f32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_add_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x89,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_add_f32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_add_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x89,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_add_f32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_add_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x89,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_add_f32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_add_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x89,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_add_f32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_add_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x89,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_add_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_add_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x88,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v1, v255 ; encoding: [0x04,0xff,0xa1,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v255, v255 ; encoding: [0x01,0xff,0xa1,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v2, v255 ; encoding: [0xff,0xff,0xa1,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v3, v255 ; encoding: [0x02,0xff,0xa1,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v4, v255 ; encoding: [0x03,0xff,0xa1,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, s105, v255 ; encoding: [0x69,0xfe,0xa1,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, s1, v255 ; encoding: [0x01,0xfe,0xa1,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xa1,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xa1,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xa1,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xa1,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xa1,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xa1,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, null, v255 ; encoding: [0xff,0xfe,0xa1,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xa1,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xa1,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xa1,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_add_nc_u32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_add_nc_u32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xa0,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v1, v255 ; encoding: [0x04,0xff,0x93,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v255, v255 ; encoding: [0x01,0xff,0x93,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v2, v255 ; encoding: [0xff,0xff,0x93,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v3, v255 ; encoding: [0x02,0xff,0x93,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v4, v255 ; encoding: [0x03,0xff,0x93,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, null, v255 ; encoding: [0xff,0xfe,0x93,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xf0,0xfe,0x93,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, 0.5, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, 0.5, v4 ; encoding: [0xc1,0xfe,0x93,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_cndmask_b32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_cndmask_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x92,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v1, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v1, v255, 0xaf123456 ; encoding: [0x04,0xff,0x83,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v255, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v255, v255, 0xaf123456 ; encoding: [0x01,0xff,0x83,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v2, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v2, v255, 0xaf123456 ; encoding: [0xff,0xff,0x83,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v3, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v3, v255, 0xaf123456 ; encoding: [0x02,0xff,0x83,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v4, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v4, v255, 0xaf123456 ; encoding: [0x03,0xff,0x83,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, s105, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, s105, v255, 0xaf123456 ; encoding: [0x69,0xfe,0x83,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, s1, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, s1, v255, 0xaf123456 ; encoding: [0x01,0xfe,0x83,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, ttmp15, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, ttmp15, v255, 0xaf123456 ; encoding: [0x7b,0xfe,0x83,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, exec_hi, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, exec_hi, v255, 0xaf123456 ; encoding: [0x7f,0xfe,0x83,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, exec_lo, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, exec_lo, v255, 0xaf123456 ; encoding: [0x7e,0xfe,0x83,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, m0, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, m0, v255, 0xaf123456 ; encoding: [0x7d,0xfe,0x83,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, vcc_hi, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, vcc_hi, v255, 0xaf123456 ; encoding: [0x6b,0xfe,0x83,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, vcc_lo, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, vcc_lo, v255, 0xaf123456 ; encoding: [0x6a,0xfe,0x83,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, null, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, null, v255, 0xaf123456 ; encoding: [0xff,0xfe,0x83,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, -1, v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, -1, v255, 0xaf123456 ; encoding: [0xfd,0xfe,0x83,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, 0.5, v3, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, 0.5, v3, 0xaf123456 ; encoding: [0xf0,0xfe,0x83,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, src_scc, v4, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, src_scc, v4, 0xaf123456 ; encoding: [0xc1,0xfe,0x83,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmaak_f32 v255, 0xaf123456, v5, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmaak_f32 v255, 0xaf123456, v5, 0xaf123456 ; encoding: [0x7c,0x08,0x82,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x81,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x81,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x81,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x81,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x81,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmac_f32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmac_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x81,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x81,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmac_f32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmac_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x81,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmac_f32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmac_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x81,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmac_f32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmac_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x81,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmac_f32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmac_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x81,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmac_f32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmac_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x81,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmac_f32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmac_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x81,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmac_f32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmac_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x81,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmac_f32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmac_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x81,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmac_f32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmac_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x81,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x81,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmac_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmac_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x80,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v4 ; encoding: [0x04,0xff,0x85,0xc8,0x01,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v4 ; encoding: [0x01,0xff,0x85,0xc8,0xff,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v4 ; encoding: [0xff,0xff,0x85,0xc8,0x02,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v4 ; encoding: [0x02,0xff,0x85,0xc8,0x03,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v4 ; encoding: [0x03,0xff,0x85,0xc8,0x04,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v4 ; encoding: [0x69,0xfe,0x85,0xc8,0x69,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v4 ; encoding: [0x01,0xfe,0x85,0xc8,0x01,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v4 ; encoding: [0x7b,0xfe,0x85,0xc8,0x7b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v4 ; encoding: [0x7f,0xfe,0x85,0xc8,0x7f,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v4 ; encoding: [0x7e,0xfe,0x85,0xc8,0x7e,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v4 ; encoding: [0x7d,0xfe,0x85,0xc8,0x7d,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v4 ; encoding: [0x6b,0xfe,0x85,0xc8,0x6b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v4 ; encoding: [0x6a,0xfe,0x85,0xc8,0x6a,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 ; encoding: [0xff,0xfe,0x85,0xc8,0x7c,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v4 ; encoding: [0xfd,0xfe,0x85,0xc8,0xc1,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v4 ; encoding: [0xf0,0xfe,0x85,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v4 ; encoding: [0xc1,0xfe,0x85,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0x7c,0x08,0x84,0xc8,0xff,0xfe,0xff,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v1, v255 ; encoding: [0x04,0xff,0xa3,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v255, v255 ; encoding: [0x01,0xff,0xa3,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v2, v255 ; encoding: [0xff,0xff,0xa3,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v3, v255 ; encoding: [0x02,0xff,0xa3,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v4, v255 ; encoding: [0x03,0xff,0xa3,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, s105, v255 ; encoding: [0x69,0xfe,0xa3,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, s1, v255 ; encoding: [0x01,0xfe,0xa3,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xa3,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xa3,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xa3,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xa3,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xa3,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xa3,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, null, v255 ; encoding: [0xff,0xfe,0xa3,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xa3,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xa3,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xa3,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_lshlrev_b32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_lshlrev_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xa2,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x95,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x95,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x95,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x95,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x95,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_max_num_f32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_max_num_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x95,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x95,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_max_num_f32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_max_num_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x95,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_max_num_f32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_max_num_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x95,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_max_num_f32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_max_num_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x95,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_max_num_f32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_max_num_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x95,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_max_num_f32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_max_num_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x95,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_max_num_f32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_max_num_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x95,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_max_num_f32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_max_num_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x95,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_max_num_f32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_max_num_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x95,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_max_num_f32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_max_num_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x95,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x95,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_max_num_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_max_num_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x94,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x97,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x97,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x97,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x97,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x97,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_min_num_f32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_min_num_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x97,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x97,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_min_num_f32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_min_num_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x97,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_min_num_f32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_min_num_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x97,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_min_num_f32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_min_num_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x97,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_min_num_f32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_min_num_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x97,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_min_num_f32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_min_num_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x97,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_min_num_f32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_min_num_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x97,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_min_num_f32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_min_num_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x97,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_min_num_f32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_min_num_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x97,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_min_num_f32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_min_num_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x97,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x97,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_min_num_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_min_num_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x96,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x91,0xc8,0x01,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x91,0xc8,0xff,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x91,0xc8,0x02,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x91,0xc8,0x03,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x91,0xc8,0x04,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x69,0xfe,0x91,0xc8,0x69,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x01,0xfe,0x91,0xc8,0x01,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7b,0xfe,0x91,0xc8,0x7b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x7f,0xfe,0x91,0xc8,0x7f,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x7e,0xfe,0x91,0xc8,0x7e,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x91,0xc8,0x7d,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x6b,0xfe,0x91,0xc8,0x6b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x6a,0xfe,0x91,0xc8,0x6a,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x91,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x91,0xc8,0xc1,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0xfe,0x91,0xc8,0xf0,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0xfe,0x91,0xc8,0xfd,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x08,0x90,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x8f,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x8f,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x8f,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x8f,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x8f,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x8f,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x8f,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x8f,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x8f,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x8f,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x8f,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x8f,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x8f,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x8f,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x8f,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x8f,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x8f,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x8e,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mul_f32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mul_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x87,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mul_f32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mul_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x87,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mul_f32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mul_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x87,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mul_f32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mul_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x87,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mul_f32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mul_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x87,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mul_f32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mul_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x87,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mul_f32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mul_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x87,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mul_f32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mul_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x87,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mul_f32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mul_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x87,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mul_f32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mul_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x87,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mul_f32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mul_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x87,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mul_f32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mul_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x87,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mul_f32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mul_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x87,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mul_f32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mul_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x87,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mul_f32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mul_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x87,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mul_f32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mul_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x87,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mul_f32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mul_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x87,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mul_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mul_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x86,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_sub_f32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_sub_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x8b,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_sub_f32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_sub_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x8b,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_sub_f32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_sub_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x8b,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_sub_f32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_sub_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x8b,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_sub_f32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_sub_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x8b,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_sub_f32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_sub_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x8b,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_sub_f32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_sub_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x8b,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_sub_f32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_sub_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x8b,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_sub_f32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_sub_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x8b,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_sub_f32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_sub_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x8b,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_sub_f32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_sub_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x8b,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_sub_f32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_sub_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x8b,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_sub_f32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_sub_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x8b,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_sub_f32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_sub_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x8b,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_sub_f32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_sub_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x8b,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_sub_f32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_sub_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x8b,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_sub_f32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_sub_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x8b,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_sub_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_sub_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x8a,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x8d,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x8d,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x8d,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x8d,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x8d,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_subrev_f32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_subrev_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x8d,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x8d,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_subrev_f32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_subrev_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x8d,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_subrev_f32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_subrev_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x8d,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_subrev_f32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_subrev_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x8d,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_subrev_f32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_subrev_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x8d,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_subrev_f32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_subrev_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x8d,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_subrev_f32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_subrev_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x8d,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_subrev_f32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_subrev_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x8d,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_subrev_f32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_subrev_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x8d,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_subrev_f32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_subrev_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x8d,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x8d,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_subrev_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_subrev_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x8c,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x88,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x88,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x88,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x88,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x88,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x88,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x88,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x88,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x88,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x88,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x88,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x88,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x88,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x88,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x88,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x88,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x88,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x88,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xa0,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xa0,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xa0,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xa0,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xa0,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xa0,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xa0,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa0,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa0,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa0,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa0,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa0,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa0,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xa0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa0,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa0,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa0,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x92,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x92,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x92,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x92,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x92,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x92,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x92,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x92,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x92,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x92,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x92,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x92,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x92,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x92,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x92,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x92,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x92,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x92,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x82,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x82,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x82,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x82,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x82,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x82,0xca,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x82,0xca,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x82,0xca,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x82,0xca,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x82,0xca,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x82,0xca,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x82,0xca,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x82,0xca,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x82,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x82,0xca,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x82,0xca,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x82,0xca,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x82,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x80,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x80,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x80,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x80,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x80,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x80,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x80,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x80,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x80,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x80,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x80,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x80,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x80,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x80,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x80,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x80,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x80,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x80,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x85,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x85,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x85,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x85,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x85,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x85,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x85,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x85,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x85,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x85,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x85,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x85,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x85,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x85,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x85,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x84,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x84,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x84,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xa2,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xa2,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xa2,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xa2,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xa2,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xa2,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xa2,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa2,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa2,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa2,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa2,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa2,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa2,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xa2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa2,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa2,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa2,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x94,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x94,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x94,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x94,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x94,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x94,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x94,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x94,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x94,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x94,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x94,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x94,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x94,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x94,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x94,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x94,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x94,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x94,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x96,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x96,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x96,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x96,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x96,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x96,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x96,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x96,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x96,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x96,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x96,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x96,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x96,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x96,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x96,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x96,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x96,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x96,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_max_num_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x91,0xca,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_max_num_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x91,0xca,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_max_num_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x91,0xca,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x91,0xca,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_max_num_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x91,0xca,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_max_num_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x91,0xca,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_max_num_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x91,0xca,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x91,0xca,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x91,0xca,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x91,0xca,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_max_num_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x91,0xca,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x91,0xca,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x91,0xca,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x91,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x91,0xca,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x90,0xca,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x90,0xca,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x90,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8e,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8e,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8e,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8e,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8e,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8e,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8e,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8e,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8e,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8e,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8e,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8e,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8e,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8e,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8e,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8e,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x86,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x86,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x86,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x86,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x86,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x86,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x86,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x86,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x86,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x86,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x86,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x86,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x86,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x86,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x86,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x86,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x86,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x86,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8a,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8a,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8a,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8a,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8a,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8a,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8a,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8a,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8a,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8a,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8a,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8a,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8a,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8a,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8a,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8a,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8c,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8c,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8c,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8c,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8c,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8c,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8c,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8c,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8c,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8c,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8c,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8c,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8c,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8c,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8c,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8c,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc8,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc8,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc8,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc8,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc8,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc8,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc8,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc8,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc8,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc8,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc8,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc8,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc8,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc8,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc8,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc8,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe0,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe0,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe0,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe0,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe0,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe0,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe0,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe0,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe0,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe0,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe0,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe0,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe0,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe0,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe0,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe0,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xd2,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xd2,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xd2,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xd2,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xd2,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0xd2,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0xd2,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0xd2,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0xd2,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0xd2,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd2,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0xd2,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0xd2,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0xd2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd2,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd2,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd2,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0xc2,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0xc2,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0xc2,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0xc2,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0xc2,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0xc2,0xca,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0xc2,0xca,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0xc2,0xca,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0xc2,0xca,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0xc2,0xca,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0xc2,0xca,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0xc2,0xca,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0xc2,0xca,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0xc2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0xc2,0xca,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0xc2,0xca,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0xc2,0xca,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0xc2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc0,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc0,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc0,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc0,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc0,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc0,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc0,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc0,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc0,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc0,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc0,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc0,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc0,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc0,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc0,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc0,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0xc5,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0xc5,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0xc5,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0xc5,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0xc5,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0xc5,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0xc5,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0xc5,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0xc5,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0xc5,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0xc5,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0xc5,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0xc5,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xc5,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0xc5,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0xc4,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0xc4,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc4,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xe2,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xe2,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xe2,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xe2,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xe2,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xe2,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xe2,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe2,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe2,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe2,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe2,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe2,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe2,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xe2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe2,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe2,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe2,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd4,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd4,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd4,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd4,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd4,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd4,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd4,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd4,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd4,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd4,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd4,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd4,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd4,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd4,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd4,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd4,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd4,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd4,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd6,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd6,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd6,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd6,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd6,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd6,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd6,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd6,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd6,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd6,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd6,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd6,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd6,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd6,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd6,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd6,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd6,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd6,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_min_num_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0xd1,0xca,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_min_num_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0xd1,0xca,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_min_num_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0xd1,0xca,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0xd1,0xca,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_min_num_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0xd1,0xca,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_min_num_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0xd1,0xca,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_min_num_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0xd1,0xca,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0xd1,0xca,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0xd1,0xca,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0xd1,0xca,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_min_num_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0xd1,0xca,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0xd1,0xca,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0xd1,0xca,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0xd1,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0xd1,0xca,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0xd0,0xca,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0xd0,0xca,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0xd0,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xce,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xce,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xce,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xce,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xce,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xce,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xce,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xce,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xce,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xce,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xce,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xce,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xce,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0xce,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xce,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xce,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xce,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xce,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc6,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc6,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc6,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc6,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc6,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc6,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc6,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc6,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc6,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc6,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc6,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc6,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc6,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc6,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc6,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc6,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc6,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc6,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xca,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xca,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xca,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xca,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xca,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xca,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xca,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xca,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xca,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xca,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xca,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xca,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xca,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0xca,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xca,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xca,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xca,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xca,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xcc,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xcc,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xcc,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xcc,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xcc,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xcc,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xcc,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xcc,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xcc,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xcc,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xcc,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xcc,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xcc,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0xcc,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xcc,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xcc,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xcc,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xcc,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_add_f32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_add_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x08,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_add_f32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_add_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x08,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_add_f32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_add_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x08,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_add_f32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_add_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x08,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_add_f32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_add_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x08,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_add_f32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_add_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x08,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_add_f32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_add_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x08,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_add_f32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_add_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x08,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_add_f32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_add_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x08,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_add_f32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_add_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x08,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_add_f32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_add_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x08,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_add_f32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_add_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x08,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_add_f32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_add_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x08,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_add_f32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_add_f32 v6, null, v255 ; encoding: [0xff,0x00,0x08,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_add_f32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_add_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x08,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_add_f32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_add_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x08,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_add_f32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_add_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x08,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_add_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_add_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x08,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_add_nc_u32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_add_nc_u32 v6, v1, v255 ; encoding: [0x04,0x01,0x20,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_add_nc_u32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_add_nc_u32 v6, v255, v255 ; encoding: [0x01,0x01,0x20,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_add_nc_u32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_add_nc_u32 v6, v2, v255 ; encoding: [0xff,0x01,0x20,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_add_nc_u32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_add_nc_u32 v6, v3, v255 ; encoding: [0x02,0x01,0x20,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_add_nc_u32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_add_nc_u32 v6, v4, v255 ; encoding: [0x03,0x01,0x20,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_add_nc_u32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_add_nc_u32 v6, s1, v255 ; encoding: [0x69,0x00,0x20,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_add_nc_u32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_add_nc_u32 v6, s105, v255 ; encoding: [0x01,0x00,0x20,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_add_nc_u32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_add_nc_u32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x20,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_add_nc_u32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_add_nc_u32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x20,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_add_nc_u32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_add_nc_u32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x20,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_add_nc_u32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_add_nc_u32 v6, m0, v255 ; encoding: [0x7d,0x00,0x20,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_add_nc_u32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_add_nc_u32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x20,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_add_nc_u32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_add_nc_u32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x20,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_add_nc_u32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_add_nc_u32 v6, null, v255 ; encoding: [0xff,0x00,0x20,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_add_nc_u32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_add_nc_u32 v6, -1, v255 ; encoding: [0xfd,0x00,0x20,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_add_nc_u32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_add_nc_u32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x20,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_add_nc_u32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_add_nc_u32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x20,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_add_nc_u32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_add_nc_u32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x20,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v6, v1, v255 ; encoding: [0x04,0x01,0x12,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_cndmask_b32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_cndmask_b32 v6, v255, v255 ; encoding: [0x01,0x01,0x12,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_cndmask_b32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_cndmask_b32 v6, v2, v255 ; encoding: [0xff,0x01,0x12,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_cndmask_b32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_cndmask_b32 v6, v3, v255 ; encoding: [0x02,0x01,0x12,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_cndmask_b32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_cndmask_b32 v6, v4, v255 ; encoding: [0x03,0x01,0x12,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_cndmask_b32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_cndmask_b32 v6, s105, v255 ; encoding: [0x69,0x00,0x12,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_cndmask_b32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_cndmask_b32 v6, s1, v255 ; encoding: [0x01,0x00,0x12,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_cndmask_b32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_cndmask_b32 v6, ttmp15, v255 ; encoding: [0x7b,0x00,0x12,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_cndmask_b32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_cndmask_b32 v6, exec_hi, v255 ; encoding: [0x7f,0x00,0x12,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_cndmask_b32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_cndmask_b32 v6, exec_lo, v255 ; encoding: [0x7e,0x00,0x12,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_cndmask_b32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_cndmask_b32 v6, m0, v255 ; encoding: [0x7d,0x00,0x12,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_cndmask_b32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_cndmask_b32 v6, vcc_hi, v255 ; encoding: [0x6b,0x00,0x12,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_cndmask_b32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_cndmask_b32 v6, vcc_lo, v255 ; encoding: [0x6a,0x00,0x12,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_cndmask_b32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_cndmask_b32 v6, null, v255 ; encoding: [0xff,0x00,0x12,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_cndmask_b32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_cndmask_b32 v6, -1, v255 ; encoding: [0xfd,0x00,0x12,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_cndmask_b32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_cndmask_b32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x12,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_cndmask_b32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_cndmask_b32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x12,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_cndmask_b32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_cndmask_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x12,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_fmaak_f32 v6, v1, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_fmaak_f32 v6, v1, v255, 0xaf123456 ; encoding: [0x04,0x01,0x02,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_fmaak_f32 v6, v255, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_fmaak_f32 v6, v255, v255, 0xaf123456 ; encoding: [0x01,0x01,0x02,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_fmaak_f32 v6, v2, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_fmaak_f32 v6, v2, v255, 0xaf123456 ; encoding: [0xff,0x01,0x02,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_fmaak_f32 v6, v3, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_fmaak_f32 v6, v3, v255, 0xaf123456 ; encoding: [0x02,0x01,0x02,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_fmaak_f32 v6, v4, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_fmaak_f32 v6, v4, v255, 0xaf123456 ; encoding: [0x03,0x01,0x02,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_fmaak_f32 v6, s105, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_fmaak_f32 v6, s105, v255, 0xaf123456 ; encoding: [0x69,0x00,0x02,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_fmaak_f32 v6, s1, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_fmaak_f32 v6, s1, v255, 0xaf123456 ; encoding: [0x01,0x00,0x02,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_fmaak_f32 v6, ttmp15, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_fmaak_f32 v6, ttmp15, v255, 0xaf123456 ; encoding: [0x7b,0x00,0x02,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_fmaak_f32 v6, exec_hi, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_fmaak_f32 v6, exec_hi, v255, 0xaf123456 ; encoding: [0x7f,0x00,0x02,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_fmaak_f32 v6, exec_lo, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_fmaak_f32 v6, exec_lo, v255, 0xaf123456 ; encoding: [0x7e,0x00,0x02,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_fmaak_f32 v6, m0, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_fmaak_f32 v6, m0, v255, 0xaf123456 ; encoding: [0x7d,0x00,0x02,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_fmaak_f32 v6, vcc_hi, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_fmaak_f32 v6, vcc_hi, v255, 0xaf123456 ; encoding: [0x6b,0x00,0x02,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_fmaak_f32 v6, vcc_lo, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_fmaak_f32 v6, vcc_lo, v255, 0xaf123456 ; encoding: [0x6a,0x00,0x02,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmaak_f32 v6, null, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmaak_f32 v6, null, v255, 0xaf123456 ; encoding: [0xff,0x00,0x02,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_fmaak_f32 v6, -1, v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_fmaak_f32 v6, -1, v255, 0xaf123456 ; encoding: [0xfd,0x00,0x02,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_fmaak_f32 v6, 0.5, v3, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_fmaak_f32 v6, 0.5, v3, 0xaf123456 ; encoding: [0xf0,0x00,0x02,0xca,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_fmaak_f32 v6, src_scc, v4, 0xaf123456 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_fmaak_f32 v6, src_scc, v4, 0xaf123456 ; encoding: [0xc1,0x00,0x02,0xca,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_fmaak_f32 v255, 0xaf123456, v5, 0xaf123456 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_fmaak_f32 v255, 0xaf123456, v5, 0xaf123456 ; encoding: [0x7c,0x00,0x02,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_fmac_f32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_fmac_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x00,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_fmac_f32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_fmac_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x00,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_fmac_f32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_fmac_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x00,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_fmac_f32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_fmac_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x00,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_fmac_f32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_fmac_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x00,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_fmac_f32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_fmac_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x00,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_fmac_f32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_fmac_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x00,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_fmac_f32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_fmac_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x00,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_fmac_f32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_fmac_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x00,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_fmac_f32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_fmac_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x00,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_fmac_f32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_fmac_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x00,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_fmac_f32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_fmac_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x00,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_fmac_f32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_fmac_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x00,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmac_f32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmac_f32 v6, null, v255 ; encoding: [0xff,0x00,0x00,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_fmac_f32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_fmac_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x00,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_fmac_f32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_fmac_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x00,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_fmac_f32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_fmac_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x00,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_fmac_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_fmac_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x00,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0x01,0x04,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0x01,0x04,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0x01,0x04,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0x01,0x04,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0x01,0x04,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0x00,0x04,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0x00,0x04,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0x00,0x04,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0x00,0x04,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0x00,0x04,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0x00,0x04,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0x00,0x04,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0x00,0x04,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0x00,0x04,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0x00,0x04,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x00,0x04,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x00,0x04,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x00,0x04,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_lshlrev_b32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_lshlrev_b32 v6, v1, v255 ; encoding: [0x04,0x01,0x22,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_lshlrev_b32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_lshlrev_b32 v6, v255, v255 ; encoding: [0x01,0x01,0x22,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_lshlrev_b32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_lshlrev_b32 v6, v2, v255 ; encoding: [0xff,0x01,0x22,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_lshlrev_b32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_lshlrev_b32 v6, v3, v255 ; encoding: [0x02,0x01,0x22,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_lshlrev_b32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_lshlrev_b32 v6, v4, v255 ; encoding: [0x03,0x01,0x22,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_lshlrev_b32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_lshlrev_b32 v6, s1, v255 ; encoding: [0x69,0x00,0x22,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_lshlrev_b32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_lshlrev_b32 v6, s105, v255 ; encoding: [0x01,0x00,0x22,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_lshlrev_b32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_lshlrev_b32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x22,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_lshlrev_b32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_lshlrev_b32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x22,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_lshlrev_b32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_lshlrev_b32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x22,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_lshlrev_b32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_lshlrev_b32 v6, m0, v255 ; encoding: [0x7d,0x00,0x22,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_lshlrev_b32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_lshlrev_b32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x22,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_lshlrev_b32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_lshlrev_b32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x22,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_lshlrev_b32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_lshlrev_b32 v6, null, v255 ; encoding: [0xff,0x00,0x22,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_lshlrev_b32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_lshlrev_b32 v6, -1, v255 ; encoding: [0xfd,0x00,0x22,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_lshlrev_b32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_lshlrev_b32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x22,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_lshlrev_b32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_lshlrev_b32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x22,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_lshlrev_b32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_lshlrev_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x22,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_max_num_f32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_max_num_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x14,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_max_num_f32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_max_num_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x14,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_max_num_f32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_max_num_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x14,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_max_num_f32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_max_num_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x14,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_max_num_f32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_max_num_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x14,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_max_num_f32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_max_num_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x14,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_max_num_f32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_max_num_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x14,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_max_num_f32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_max_num_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x14,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_max_num_f32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_max_num_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x14,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_max_num_f32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_max_num_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x14,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_max_num_f32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_max_num_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x14,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_max_num_f32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_max_num_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x14,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_max_num_f32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_max_num_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x14,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_max_num_f32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_max_num_f32 v6, null, v255 ; encoding: [0xff,0x00,0x14,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_max_num_f32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_max_num_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x14,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_max_num_f32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_max_num_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x14,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_max_num_f32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_max_num_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x14,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_max_num_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_max_num_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x14,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_min_num_f32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_min_num_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x16,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_min_num_f32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_min_num_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x16,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_min_num_f32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_min_num_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x16,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_min_num_f32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_min_num_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x16,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_min_num_f32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_min_num_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x16,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_min_num_f32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_min_num_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x16,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_min_num_f32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_min_num_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x16,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_min_num_f32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_min_num_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x16,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_min_num_f32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_min_num_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x16,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_min_num_f32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_min_num_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x16,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_min_num_f32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_min_num_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x16,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_min_num_f32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_min_num_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x16,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_min_num_f32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_min_num_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x16,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_min_num_f32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_min_num_f32 v6, null, v255 ; encoding: [0xff,0x00,0x16,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_min_num_f32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_min_num_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x16,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_min_num_f32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_min_num_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x16,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_min_num_f32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_min_num_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x16,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_min_num_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_min_num_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x16,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0x01,0x10,0xca,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0x01,0x10,0xca,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0x01,0x10,0xca,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0x01,0x10,0xca,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0x01,0x10,0xca,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0x00,0x10,0xca,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0x00,0x10,0xca,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0x00,0x10,0xca,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0x00,0x10,0xca,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0x00,0x10,0xca,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0x00,0x10,0xca,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0x00,0x10,0xca,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0x00,0x10,0xca,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0x00,0x10,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0x00,0x10,0xca,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x00,0x10,0xca,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x00,0x10,0xca,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x00,0x10,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_mul_dx9_zero_f32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_mul_dx9_zero_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x0e,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_mul_dx9_zero_f32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_mul_dx9_zero_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x0e,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_mul_dx9_zero_f32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_mul_dx9_zero_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x0e,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x0e,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_mul_dx9_zero_f32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_mul_dx9_zero_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x0e,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_mul_dx9_zero_f32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_mul_dx9_zero_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x0e,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_mul_dx9_zero_f32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_mul_dx9_zero_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x0e,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x0e,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x0e,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x0e,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_mul_dx9_zero_f32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_mul_dx9_zero_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x0e,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x0e,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x0e,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, null, v255 ; encoding: [0xff,0x00,0x0e,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_mul_dx9_zero_f32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_mul_dx9_zero_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x0e,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x0e,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x0e,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x0e,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_mul_f32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_mul_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x06,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_mul_f32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_mul_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x06,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_mul_f32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_mul_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x06,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_mul_f32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_mul_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x06,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_mul_f32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_mul_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x06,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_mul_f32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_mul_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x06,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_mul_f32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_mul_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x06,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_f32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x06,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_mul_f32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_mul_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x06,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_mul_f32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_mul_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x06,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_mul_f32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_mul_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x06,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_f32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x06,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_f32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x06,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mul_f32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mul_f32 v6, null, v255 ; encoding: [0xff,0x00,0x06,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_mul_f32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_mul_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x06,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_mul_f32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_mul_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x06,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_mul_f32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_mul_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x06,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_mul_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_mul_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x06,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_sub_f32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_sub_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x0a,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_sub_f32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_sub_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x0a,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_sub_f32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_sub_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x0a,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_sub_f32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_sub_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x0a,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_sub_f32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_sub_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x0a,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_sub_f32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_sub_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x0a,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_sub_f32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_sub_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x0a,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_f32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x0a,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_sub_f32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_sub_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x0a,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_sub_f32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_sub_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x0a,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_sub_f32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_sub_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x0a,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_f32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x0a,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_f32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x0a,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_sub_f32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_sub_f32 v6, null, v255 ; encoding: [0xff,0x00,0x0a,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_sub_f32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_sub_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x0a,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_sub_f32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_sub_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x0a,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_sub_f32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_sub_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x0a,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_sub_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_sub_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x0a,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_subrev_f32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_subrev_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x0c,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_subrev_f32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_subrev_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x0c,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_subrev_f32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_subrev_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x0c,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_subrev_f32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_subrev_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x0c,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_subrev_f32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_subrev_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x0c,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_subrev_f32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_subrev_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x0c,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_subrev_f32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_subrev_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x0c,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_subrev_f32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_subrev_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x0c,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_subrev_f32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_subrev_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x0c,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_subrev_f32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_subrev_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x0c,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_subrev_f32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_subrev_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x0c,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_subrev_f32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_subrev_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x0c,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_subrev_f32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_subrev_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x0c,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_subrev_f32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_subrev_f32 v6, null, v255 ; encoding: [0xff,0x00,0x0c,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_subrev_f32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_subrev_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x0c,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_subrev_f32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_subrev_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x0c,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_subrev_f32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_subrev_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x0c,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_subrev_f32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_subrev_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x0c,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc8,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc8,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc8,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc8,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc8,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc8,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc8,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc8,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc8,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc8,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc8,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc8,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc8,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc8,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc8,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc8,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe0,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe0,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe0,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe0,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe0,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe0,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe0,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe0,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe0,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe0,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe0,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe0,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe0,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe0,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe0,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe0,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xd2,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xd2,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xd2,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xd2,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xd2,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0xd2,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0xd2,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0xd2,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0xd2,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0xd2,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd2,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0xd2,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0xd2,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0xd2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd2,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd2,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd2,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0xc2,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0xc2,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0xc2,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0xc2,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0xc2,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0xc2,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0xc2,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0xc2,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0xc2,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0xc2,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0xc2,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0xc2,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0xc2,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0xc2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0xc2,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0xc2,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0xc2,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0xc2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc0,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc0,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc0,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc0,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc0,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc0,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc0,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc0,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc0,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc0,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc0,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc0,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc0,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc0,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc0,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc0,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0xc5,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0xc5,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0xc5,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0xc5,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0xc5,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0xc5,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0xc5,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0xc5,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0xc5,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0xc5,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0xc5,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0xc5,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0xc5,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xc5,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0xc5,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0xc4,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0xc4,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc4,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xe2,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xe2,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xe2,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xe2,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xe2,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xe2,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xe2,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe2,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe2,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe2,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe2,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe2,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe2,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xe2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe2,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe2,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe2,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd4,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd4,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd4,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd4,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd4,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd4,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd4,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd4,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd4,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd4,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd4,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd4,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd4,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd4,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd4,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd4,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd4,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd4,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd6,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd6,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd6,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd6,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd6,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd6,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd6,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd6,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd6,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd6,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd6,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd6,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd6,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd6,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd6,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd6,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd6,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd6,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0xd1,0xc9,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0xd1,0xc9,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0xd1,0xc9,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0xd1,0xc9,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0xd1,0xc9,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0xd1,0xc9,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0xd1,0xc9,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0xd1,0xc9,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0xd1,0xc9,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0xd1,0xc9,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0xd1,0xc9,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0xd1,0xc9,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0xd1,0xc9,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0xd1,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0xd1,0xc9,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0xd0,0xc9,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0xd0,0xc9,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0xd0,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xce,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xce,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xce,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xce,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xce,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xce,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xce,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xce,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xce,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xce,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xce,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xce,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xce,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0xce,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xce,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xce,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xce,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xce,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc6,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc6,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc6,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc6,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc6,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc6,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc6,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc6,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc6,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc6,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc6,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc6,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc6,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc6,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc6,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc6,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc6,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc6,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xca,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xca,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xca,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xca,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xca,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xca,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xca,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xca,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xca,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xca,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xca,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xca,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xca,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0xca,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xca,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xca,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xca,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xca,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xcc,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xcc,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xcc,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xcc,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xcc,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xcc,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xcc,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xcc,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xcc,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xcc,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xcc,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xcc,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xcc,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0xcc,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xcc,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xcc,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xcc,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xcc,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc8,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc8,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc8,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc8,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc8,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc8,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc8,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc8,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc8,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc8,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc8,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc8,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc8,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc8,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc8,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc8,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc8,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc8,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe0,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe0,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe0,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe0,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe0,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe0,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe0,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe0,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe0,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe0,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe0,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe0,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe0,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe0,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe0,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe0,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xd2,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xd2,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xd2,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xd2,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xd2,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0xd2,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0xd2,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0xd2,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0xd2,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0xd2,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd2,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0xd2,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0xd2,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0xd2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd2,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd2,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd2,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0xc2,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0xc2,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0xc2,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0xc2,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0xc2,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0xc2,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0xc2,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0xc2,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0xc2,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0xc2,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0xc2,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0xc2,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0xc2,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0xc2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0xc2,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0xc2,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0xc2,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0xc2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc0,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc0,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc0,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc0,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc0,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc0,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc0,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc0,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc0,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc0,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc0,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc0,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc0,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc0,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc0,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc0,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0xc5,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0xc5,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0xc5,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0xc5,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0xc5,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0xc5,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0xc5,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0xc5,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0xc5,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0xc5,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0xc5,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0xc5,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0xc5,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xc5,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0xc5,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0xc4,0xc8,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0xc4,0xc8,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc4,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xe2,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xe2,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xe2,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xe2,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xe2,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xe2,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xe2,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe2,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe2,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe2,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe2,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe2,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe2,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xe2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe2,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe2,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe2,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd4,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd4,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd4,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd4,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd4,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd4,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd4,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd4,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd4,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd4,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd4,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd4,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd4,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd4,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd4,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd4,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd4,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd4,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd6,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd6,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd6,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd6,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd6,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd6,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd6,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd6,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd6,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd6,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd6,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd6,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd6,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd6,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd6,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd6,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd6,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd6,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_mul_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0xd1,0xc8,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_mul_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0xd1,0xc8,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_mul_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0xd1,0xc8,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_mul_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0xd1,0xc8,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_mul_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0xd1,0xc8,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_mul_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0xd1,0xc8,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_mul_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0xd1,0xc8,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0xd1,0xc8,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0xd1,0xc8,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0xd1,0xc8,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_mul_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0xd1,0xc8,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0xd1,0xc8,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0xd1,0xc8,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0xd1,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_mul_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0xd1,0xc8,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0xd0,0xc8,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0xd0,0xc8,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0xd0,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xce,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xce,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xce,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xce,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xce,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xce,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xce,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xce,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xce,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xce,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xce,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xce,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xce,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0xce,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xce,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xce,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xce,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xce,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc6,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc6,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc6,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc6,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc6,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc6,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc6,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc6,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc6,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc6,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc6,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc6,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc6,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc6,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc6,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc6,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc6,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc6,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xca,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xca,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xca,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xca,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xca,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xca,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xca,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xca,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xca,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xca,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xca,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xca,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xca,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0xca,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xca,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xca,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xca,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xca,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xcc,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xcc,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xcc,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xcc,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xcc,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xcc,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xcc,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xcc,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xcc,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xcc,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xcc,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xcc,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xcc,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0xcc,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xcc,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xcc,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xcc,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xcc,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x48,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x48,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x48,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x48,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x48,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x48,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x48,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x48,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x48,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x48,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x48,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x48,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x48,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x48,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x60,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x60,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x60,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x60,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x60,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x60,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x60,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x60,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x60,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x60,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x60,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x60,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x60,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x60,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x60,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x60,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x60,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x60,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x52,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x52,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x52,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x52,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x52,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x52,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x52,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x52,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x52,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x52,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x52,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x52,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x52,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x52,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x52,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x52,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x52,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x52,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x42,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x42,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x42,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x42,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x42,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x42,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x42,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x42,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x42,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x42,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x42,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x42,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x42,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x42,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x42,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x42,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x42,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x42,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x40,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x40,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x40,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x40,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x40,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x40,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x40,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x40,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x40,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x40,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x40,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x40,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x40,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x40,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x40,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x40,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x40,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x40,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x45,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x45,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x45,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x45,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x45,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x45,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x45,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x45,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x45,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x45,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x45,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x45,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x45,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x45,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x45,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x44,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x44,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x44,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x62,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x62,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x62,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x62,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x62,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x62,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x62,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x62,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x62,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x62,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x62,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x62,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x62,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x62,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x62,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x62,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x62,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x62,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x54,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x54,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x54,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x54,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x54,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x54,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x54,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x54,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x54,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x54,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x54,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x54,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x54,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x54,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x54,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x54,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x54,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x54,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x56,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x56,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x56,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x56,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x56,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x56,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x56,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x56,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x56,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x56,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x56,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x56,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x56,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x56,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x56,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x56,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x56,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x56,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_sub_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x51,0xc9,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_sub_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x51,0xc9,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_sub_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x51,0xc9,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_sub_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x51,0xc9,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_sub_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x51,0xc9,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_sub_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x51,0xc9,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_sub_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x51,0xc9,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x51,0xc9,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x51,0xc9,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x51,0xc9,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_sub_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x51,0xc9,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x51,0xc9,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x51,0xc9,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x51,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_sub_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x51,0xc9,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x50,0xc9,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x50,0xc9,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x50,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4e,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4e,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4e,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4e,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4e,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x4e,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x4e,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x4e,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x4e,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x4e,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4e,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x4e,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x4e,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4e,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4e,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4e,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x46,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x46,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x46,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x46,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x46,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x46,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x46,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x46,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x46,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x46,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x46,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x46,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x46,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x46,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x46,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x46,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x46,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x46,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4a,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4a,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4a,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4a,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4a,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x4a,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x4a,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x4a,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x4a,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x4a,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4a,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x4a,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x4a,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4a,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4a,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4a,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4c,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4c,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4c,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4c,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4c,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x4c,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x4c,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x4c,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x4c,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x4c,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4c,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x4c,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x4c,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4c,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4c,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4c,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x88,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x88,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x88,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x88,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x88,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x88,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x88,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x88,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x88,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x88,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x88,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x88,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x88,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x88,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x88,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x88,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x88,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x88,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xa0,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xa0,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xa0,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xa0,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xa0,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xa0,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xa0,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa0,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa0,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa0,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa0,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa0,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa0,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xa0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa0,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa0,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa0,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x92,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x92,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x92,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x92,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x92,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x92,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x92,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x92,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x92,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x92,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x92,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x92,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x92,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x92,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x92,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x92,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x92,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x92,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x82,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x82,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x82,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x82,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x82,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x82,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x82,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x82,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x82,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x82,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x82,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x82,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x82,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x82,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x82,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x82,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x82,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x82,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x80,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x80,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x80,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x80,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x80,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x80,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x80,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x80,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x80,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x80,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x80,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x80,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x80,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x80,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x80,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x80,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x80,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x80,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x85,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x85,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x85,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x85,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x85,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x85,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x85,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x85,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x85,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x85,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x85,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x85,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x85,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x85,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x85,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x84,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x84,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x84,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xa2,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xa2,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xa2,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xa2,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xa2,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xa2,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xa2,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa2,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa2,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa2,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa2,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa2,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa2,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xa2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa2,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa2,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa2,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x94,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x94,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x94,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x94,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x94,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x94,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x94,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x94,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x94,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x94,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x94,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x94,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x94,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x94,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x94,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x94,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x94,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x94,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x96,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x96,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x96,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x96,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x96,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x96,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x96,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x96,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x96,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x96,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x96,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x96,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x96,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x96,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x96,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x96,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x96,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x96,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 +// GFX12: v_dual_subrev_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x91,0xc9,0x01,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 +// GFX12: v_dual_subrev_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x91,0xc9,0xff,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 +// GFX12: v_dual_subrev_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x91,0xc9,0x02,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x91,0xc9,0x03,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 +// GFX12: v_dual_subrev_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x91,0xc9,0x04,0x01,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 +// GFX12: v_dual_subrev_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x91,0xc9,0x01,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 +// GFX12: v_dual_subrev_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x91,0xc9,0x69,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo +// GFX12: v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x91,0xc9,0x6a,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi +// GFX12: v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x91,0xc9,0x6b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x91,0xc9,0x7b,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 +// GFX12: v_dual_subrev_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x91,0xc9,0x7d,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x91,0xc9,0x7e,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x91,0xc9,0x7f,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x91,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 +// GFX12: v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x91,0xc9,0xc1,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x90,0xc9,0xf0,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x90,0xc9,0xfd,0x00,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x90,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8e,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8e,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8e,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8e,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8e,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8e,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8e,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8e,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8e,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8e,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8e,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8e,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8e,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8e,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8e,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8e,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x86,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x86,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x86,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x86,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x86,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x86,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x86,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x86,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x86,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x86,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x86,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x86,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x86,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x86,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x86,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x86,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x86,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x86,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8a,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8a,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8a,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8a,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8a,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8a,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8a,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8a,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8a,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8a,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8a,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8a,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8a,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8a,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8a,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8a,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8c,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8c,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8c,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8c,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8c,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8c,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8c,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8c,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8c,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8c,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8c,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8c,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8c,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8c,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8c,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8c,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x2e,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x2e,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x2e,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x2e,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x2e,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x2e,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x2e,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2e,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2e,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2e,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2e,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2e,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2e,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x2e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2e,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2e,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2e,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6e,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6e,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6e,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6e,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6e,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x6e,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x6e,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6e,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6e,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6e,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6e,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6e,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6e,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6e,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6e,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6e,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6e,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6e,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6e,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6e,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6e,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x6e,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x6e,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6e,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6e,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6e,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6e,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6e,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6e,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6e,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6e,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6e,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x2e,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x2e,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x2e,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x2e,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x2e,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x2e,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x2e,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2e,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2e,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2e,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2e,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2e,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2e,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x2e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2e,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2e,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2e,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_max_i32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_max_i32 v6, v1, v255 ; encoding: [0x04,0xff,0xaf,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_max_i32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_max_i32 v6, v255, v255 ; encoding: [0x01,0xff,0xaf,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_max_i32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_max_i32 v6, v2, v255 ; encoding: [0xff,0xff,0xaf,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_max_i32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_max_i32 v6, v3, v255 ; encoding: [0x02,0xff,0xaf,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_max_i32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_max_i32 v6, v4, v255 ; encoding: [0x03,0xff,0xaf,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_max_i32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_max_i32 v6, s105, v255 ; encoding: [0x69,0xfe,0xaf,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_max_i32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_max_i32 v6, s1, v255 ; encoding: [0x01,0xfe,0xaf,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_max_i32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_max_i32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xaf,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_max_i32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_max_i32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xaf,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_max_i32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_max_i32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xaf,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_max_i32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_max_i32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xaf,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_max_i32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_max_i32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xaf,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_max_i32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_max_i32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xaf,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_max_i32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_max_i32 v6, null, v255 ; encoding: [0xff,0xfe,0xaf,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_max_i32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_max_i32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xaf,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_max_i32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_max_i32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xaf,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_max_i32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_max_i32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xaf,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_max_i32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_max_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xae,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xae,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xae,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xae,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xae,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xae,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xae,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xae,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xae,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xae,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xae,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xae,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xae,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xae,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xae,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xae,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xae,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xae,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xae,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xee,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xee,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xee,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xee,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xee,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xee,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xee,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xee,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xee,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xee,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xee,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xee,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xee,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xee,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xee,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xee,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xee,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xee,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_max_i32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_max_i32 v6, v1, v255 ; encoding: [0x04,0x01,0x2e,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_max_i32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_max_i32 v6, v255, v255 ; encoding: [0x01,0x01,0x2e,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_max_i32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_max_i32 v6, v2, v255 ; encoding: [0xff,0x01,0x2e,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_max_i32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_max_i32 v6, v3, v255 ; encoding: [0x02,0x01,0x2e,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_max_i32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_max_i32 v6, v4, v255 ; encoding: [0x03,0x01,0x2e,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_max_i32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_max_i32 v6, s1, v255 ; encoding: [0x69,0x00,0x2e,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_max_i32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_max_i32 v6, s105, v255 ; encoding: [0x01,0x00,0x2e,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_max_i32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_max_i32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x2e,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_max_i32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_max_i32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x2e,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_max_i32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_max_i32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x2e,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_max_i32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_max_i32 v6, m0, v255 ; encoding: [0x7d,0x00,0x2e,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_max_i32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_max_i32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x2e,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_max_i32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_max_i32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x2e,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_max_i32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_max_i32 v6, null, v255 ; encoding: [0xff,0x00,0x2e,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_max_i32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_max_i32 v6, -1, v255 ; encoding: [0xfd,0x00,0x2e,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_max_i32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_max_i32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x2e,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_max_i32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_max_i32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x2e,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_max_i32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_max_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x2e,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xee,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xee,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xee,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xee,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xee,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xee,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xee,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xee,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xee,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xee,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xee,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xee,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xee,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xee,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xee,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xee,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xee,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xee,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xee,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xee,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xee,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xee,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xee,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xee,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xee,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xee,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xee,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xee,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xee,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xee,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xee,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xee,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xee,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xee,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xee,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xee,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6e,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6e,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6e,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6e,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6e,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x6e,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x6e,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x6e,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x6e,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x6e,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6e,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x6e,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x6e,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6e,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6e,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6e,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xae,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xae,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xae,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xae,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xae,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xae,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xae,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xae,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xae,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xae,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xae,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xae,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xae,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xae,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xae,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xae,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xae,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xae,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x30,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x30,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x30,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x30,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x30,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x30,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x30,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x30,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x30,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x30,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x30,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x30,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x30,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x30,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x30,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x30,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x30,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x30,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x70,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x70,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x70,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x70,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x70,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x70,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x70,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x70,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x70,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x70,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x70,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x70,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x70,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x70,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x70,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x70,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x70,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x70,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x70,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x70,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x70,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x70,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x70,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x70,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x70,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x70,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x70,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x70,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x70,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x70,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x70,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x70,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x70,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x70,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x70,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x70,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x30,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x30,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x30,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x30,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x30,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x30,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x30,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x30,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x30,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x30,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x30,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x30,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x30,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x30,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x30,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x30,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x30,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x30,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_min_i32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_min_i32 v6, v1, v255 ; encoding: [0x04,0xff,0xb1,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_min_i32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_min_i32 v6, v255, v255 ; encoding: [0x01,0xff,0xb1,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_min_i32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_min_i32 v6, v2, v255 ; encoding: [0xff,0xff,0xb1,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_min_i32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_min_i32 v6, v3, v255 ; encoding: [0x02,0xff,0xb1,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_min_i32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_min_i32 v6, v4, v255 ; encoding: [0x03,0xff,0xb1,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_min_i32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_min_i32 v6, s105, v255 ; encoding: [0x69,0xfe,0xb1,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_min_i32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_min_i32 v6, s1, v255 ; encoding: [0x01,0xfe,0xb1,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_min_i32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_min_i32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xb1,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_min_i32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_min_i32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xb1,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_min_i32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_min_i32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xb1,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_min_i32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_min_i32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xb1,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_min_i32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_min_i32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xb1,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_min_i32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_min_i32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xb1,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_min_i32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_min_i32 v6, null, v255 ; encoding: [0xff,0xfe,0xb1,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_min_i32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_min_i32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xb1,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_min_i32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_min_i32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xb1,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_min_i32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_min_i32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xb1,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_min_i32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_min_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xb0,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xb0,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xb0,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xb0,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xb0,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xb0,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xb0,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xb0,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xb0,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xb0,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xb0,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xb0,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xb0,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xb0,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xb0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xb0,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xb0,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xb0,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xb0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xf0,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xf0,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xf0,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xf0,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xf0,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xf0,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xf0,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xf0,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xf0,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xf0,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xf0,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xf0,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xf0,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xf0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xf0,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xf0,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xf0,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xf0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_min_i32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_min_i32 v6, v1, v255 ; encoding: [0x04,0x01,0x30,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_min_i32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_min_i32 v6, v255, v255 ; encoding: [0x01,0x01,0x30,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_min_i32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_min_i32 v6, v2, v255 ; encoding: [0xff,0x01,0x30,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_min_i32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_min_i32 v6, v3, v255 ; encoding: [0x02,0x01,0x30,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_min_i32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_min_i32 v6, v4, v255 ; encoding: [0x03,0x01,0x30,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_min_i32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_min_i32 v6, s1, v255 ; encoding: [0x69,0x00,0x30,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_min_i32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_min_i32 v6, s105, v255 ; encoding: [0x01,0x00,0x30,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_min_i32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_min_i32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x30,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_min_i32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_min_i32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x30,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_min_i32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_min_i32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x30,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_min_i32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_min_i32 v6, m0, v255 ; encoding: [0x7d,0x00,0x30,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_min_i32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_min_i32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x30,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_min_i32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_min_i32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x30,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_min_i32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_min_i32 v6, null, v255 ; encoding: [0xff,0x00,0x30,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_min_i32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_min_i32 v6, -1, v255 ; encoding: [0xfd,0x00,0x30,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_min_i32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_min_i32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x30,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_min_i32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_min_i32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x30,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_min_i32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_min_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x30,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xf0,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xf0,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xf0,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xf0,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xf0,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xf0,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xf0,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xf0,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xf0,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xf0,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xf0,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xf0,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xf0,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xf0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xf0,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xf0,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xf0,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xf0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xf0,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xf0,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xf0,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xf0,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xf0,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xf0,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xf0,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xf0,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xf0,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xf0,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xf0,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xf0,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xf0,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xf0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xf0,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xf0,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xf0,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xf0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x70,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x70,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x70,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x70,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x70,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x70,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x70,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x70,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x70,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x70,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x70,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x70,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x70,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x70,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x70,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x70,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x70,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x70,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xb0,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xb0,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xb0,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xb0,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xb0,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xb0,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xb0,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xb0,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xb0,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xb0,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xb0,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xb0,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xb0,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xb0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xb0,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xb0,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xb0,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xb0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x28,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x28,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x28,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x28,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x28,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x28,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x28,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x28,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x28,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x28,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x28,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x28,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x28,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x28,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x28,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x28,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x28,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x28,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x68,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x68,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x68,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x68,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x68,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x69,0x04,0x68,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x01,0x04,0x68,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x68,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x68,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x68,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x68,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x68,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x68,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x68,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x68,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x68,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x68,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x68,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x68,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x68,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x68,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x68,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x68,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x69,0x04,0x68,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x01,0x04,0x68,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x68,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x68,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x68,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x68,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x68,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x68,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x68,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x68,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x68,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x68,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x68,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x28,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x28,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x28,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x28,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x28,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x28,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x28,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x28,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x28,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x28,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x28,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x28,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x28,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x28,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x28,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x28,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x28,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x28,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v1, v255 ; encoding: [0x04,0xff,0xa9,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v255, v255 ; encoding: [0x01,0xff,0xa9,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v2, v255 ; encoding: [0xff,0xff,0xa9,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v3, v255 ; encoding: [0x02,0xff,0xa9,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v4, v255 ; encoding: [0x03,0xff,0xa9,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, s105, v255 ; encoding: [0x69,0xfe,0xa9,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, s1, v255 ; encoding: [0x01,0xfe,0xa9,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xa9,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xa9,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xa9,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xa9,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xa9,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xa9,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, null, v255 ; encoding: [0xff,0xfe,0xa9,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xa9,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xa9,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xa9,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_sub_nc_u32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_sub_nc_u32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xa8,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xa8,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xa8,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xa8,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xa8,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xa8,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xa8,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xa8,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa8,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa8,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa8,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa8,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa8,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa8,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xa8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa8,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa8,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa8,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe8,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe8,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe8,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe8,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe8,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe8,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe8,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe8,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe8,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe8,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe8,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe8,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe8,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe8,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe8,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe8,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_sub_nc_u32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_sub_nc_u32 v6, v1, v255 ; encoding: [0x04,0x01,0x28,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_sub_nc_u32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_sub_nc_u32 v6, v255, v255 ; encoding: [0x01,0x01,0x28,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_sub_nc_u32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_sub_nc_u32 v6, v2, v255 ; encoding: [0xff,0x01,0x28,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_sub_nc_u32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_sub_nc_u32 v6, v3, v255 ; encoding: [0x02,0x01,0x28,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_sub_nc_u32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_sub_nc_u32 v6, v4, v255 ; encoding: [0x03,0x01,0x28,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_sub_nc_u32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_sub_nc_u32 v6, s1, v255 ; encoding: [0x69,0x00,0x28,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_sub_nc_u32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_sub_nc_u32 v6, s105, v255 ; encoding: [0x01,0x00,0x28,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_nc_u32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_nc_u32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x28,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_sub_nc_u32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_sub_nc_u32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x28,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_sub_nc_u32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_sub_nc_u32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x28,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_sub_nc_u32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_sub_nc_u32 v6, m0, v255 ; encoding: [0x7d,0x00,0x28,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_nc_u32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_nc_u32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x28,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_nc_u32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_nc_u32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x28,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_sub_nc_u32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_sub_nc_u32 v6, null, v255 ; encoding: [0xff,0x00,0x28,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_sub_nc_u32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_sub_nc_u32 v6, -1, v255 ; encoding: [0xfd,0x00,0x28,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_sub_nc_u32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_sub_nc_u32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x28,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_sub_nc_u32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_sub_nc_u32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x28,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_sub_nc_u32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_sub_nc_u32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x28,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe8,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe8,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe8,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe8,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe8,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe8,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe8,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe8,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe8,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe8,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe8,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe8,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe8,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe8,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe8,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe8,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe8,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe8,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe8,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe8,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe8,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe8,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe8,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe8,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe8,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe8,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe8,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe8,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe8,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe8,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe8,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe8,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe8,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe8,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x68,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x68,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x68,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x68,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x68,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x68,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x68,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x68,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x68,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x68,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x68,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x68,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x68,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x68,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x68,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x68,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x68,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x68,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xa8,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xa8,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xa8,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xa8,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xa8,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xa8,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xa8,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa8,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa8,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa8,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa8,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa8,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa8,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xa8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa8,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa8,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa8,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x2a,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x2a,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x2a,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x2a,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x2a,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x2a,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x2a,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2a,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2a,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2a,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2a,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2a,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2a,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x2a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2a,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2a,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2a,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x6a,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x6a,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x6a,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x6a,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x6a,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x6a,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x6a,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6a,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6a,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6a,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6a,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6a,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6a,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x6a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6a,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6a,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6a,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x6a,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x6a,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x6a,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x6a,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x6a,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x6a,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x6a,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6a,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6a,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6a,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6a,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6a,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6a,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x6a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6a,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6a,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6a,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x2a,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x2a,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x2a,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x2a,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x2a,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x2a,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x2a,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2a,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2a,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2a,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2a,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2a,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2a,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x2a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2a,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2a,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2a,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v1, v255 ; encoding: [0x04,0xff,0xab,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v255, v255 ; encoding: [0x01,0xff,0xab,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v2, v255 ; encoding: [0xff,0xff,0xab,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v3, v255 ; encoding: [0x02,0xff,0xab,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v4, v255 ; encoding: [0x03,0xff,0xab,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, s105, v255 ; encoding: [0x69,0xfe,0xab,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, s1, v255 ; encoding: [0x01,0xfe,0xab,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xab,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xab,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xab,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xab,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xab,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xab,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, null, v255 ; encoding: [0xff,0xfe,0xab,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xab,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xab,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xab,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_lshrrev_b32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_lshrrev_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xaa,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xaa,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xaa,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xaa,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xaa,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xaa,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xaa,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xaa,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xaa,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xaa,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xaa,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xaa,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xaa,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xaa,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xaa,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xaa,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xaa,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xaa,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xaa,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xea,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xea,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xea,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xea,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xea,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xea,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xea,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xea,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xea,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xea,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xea,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xea,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xea,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xea,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xea,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xea,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xea,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xea,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_lshrrev_b32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_lshrrev_b32 v6, v1, v255 ; encoding: [0x04,0x01,0x2a,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_lshrrev_b32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_lshrrev_b32 v6, v255, v255 ; encoding: [0x01,0x01,0x2a,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_lshrrev_b32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_lshrrev_b32 v6, v2, v255 ; encoding: [0xff,0x01,0x2a,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_lshrrev_b32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_lshrrev_b32 v6, v3, v255 ; encoding: [0x02,0x01,0x2a,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_lshrrev_b32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_lshrrev_b32 v6, v4, v255 ; encoding: [0x03,0x01,0x2a,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_lshrrev_b32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_lshrrev_b32 v6, s1, v255 ; encoding: [0x69,0x00,0x2a,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_lshrrev_b32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_lshrrev_b32 v6, s105, v255 ; encoding: [0x01,0x00,0x2a,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_lshrrev_b32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_lshrrev_b32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x2a,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_lshrrev_b32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_lshrrev_b32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x2a,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_lshrrev_b32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_lshrrev_b32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x2a,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_lshrrev_b32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_lshrrev_b32 v6, m0, v255 ; encoding: [0x7d,0x00,0x2a,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_lshrrev_b32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_lshrrev_b32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x2a,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_lshrrev_b32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_lshrrev_b32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x2a,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_lshrrev_b32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_lshrrev_b32 v6, null, v255 ; encoding: [0xff,0x00,0x2a,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_lshrrev_b32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_lshrrev_b32 v6, -1, v255 ; encoding: [0xfd,0x00,0x2a,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_lshrrev_b32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_lshrrev_b32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x2a,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_lshrrev_b32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_lshrrev_b32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x2a,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_lshrrev_b32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_lshrrev_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x2a,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xea,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xea,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xea,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xea,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xea,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xea,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xea,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xea,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xea,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xea,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xea,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xea,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xea,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xea,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xea,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xea,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xea,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xea,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xea,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xea,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xea,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xea,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xea,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xea,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xea,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xea,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xea,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xea,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xea,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xea,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xea,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xea,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xea,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xea,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xea,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xea,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x6a,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x6a,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x6a,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x6a,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x6a,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x6a,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x6a,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x6a,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x6a,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x6a,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6a,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x6a,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x6a,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x6a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6a,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6a,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6a,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xaa,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xaa,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xaa,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xaa,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xaa,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xaa,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xaa,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xaa,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xaa,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xaa,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xaa,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xaa,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xaa,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xaa,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xaa,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xaa,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xaa,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xaa,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_add_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x2c,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_add_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x2c,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_add_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x2c,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_add_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x2c,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_add_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x2c,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_add_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x2c,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_add_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x2c,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2c,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2c,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2c,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_add_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2c,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2c,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2c,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x2c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_add_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2c,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_add_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2c,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_add_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2c,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_add_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6c,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6c,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6c,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6c,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6c,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x6c,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x6c,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6c,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6c,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6c,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6c,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6c,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6c,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6c,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6c,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6c,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_cndmask_b32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6c,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6c,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6c,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6c,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6c,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x6c,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x6c,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6c,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6c,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6c,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6c,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6c,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6c,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6c,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6c,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6c,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_fmac_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x2c,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_fmac_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x2c,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_fmac_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x2c,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_fmac_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x2c,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_fmac_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x2c,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_fmac_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x2c,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_fmac_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x2c,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2c,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2c,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2c,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_fmac_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2c,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2c,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2c,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x2c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2c,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2c,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_fmac_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2c,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_fmac_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v1, v255 +// GFX12: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v1, v255 ; encoding: [0x04,0xff,0xad,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v255, v255 +// GFX12: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v255, v255 ; encoding: [0x01,0xff,0xad,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v2, v255 +// GFX12: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v2, v255 ; encoding: [0xff,0xff,0xad,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v3, v255 +// GFX12: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v3, v255 ; encoding: [0x02,0xff,0xad,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v4, v255 +// GFX12: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v4, v255 ; encoding: [0x03,0xff,0xad,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, s105, v255 +// GFX12: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, s105, v255 ; encoding: [0x69,0xfe,0xad,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, s1, v255 +// GFX12: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, s1, v255 ; encoding: [0x01,0xfe,0xad,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, ttmp15, v255 +// GFX12: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xad,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, exec_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xad,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, exec_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xad,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, m0, v255 +// GFX12: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xad,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, vcc_hi, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xad,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, vcc_lo, v255 +// GFX12: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xad,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, null, v255 +// GFX12: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, null, v255 ; encoding: [0xff,0xfe,0xad,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, -1, v255 +// GFX12: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xad,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, 0.5, v3 +// GFX12: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xad,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, src_scc, v4 +// GFX12: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xad,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_ashrrev_i32 v255, 0xaf123456, v5 +// GFX12: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_ashrrev_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xac,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_max_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xac,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_max_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xac,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_max_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xac,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_max_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xac,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_max_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xac,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_max_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xac,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_max_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xac,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xac,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xac,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xac,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_max_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xac,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xac,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xac,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xac,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xac,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xac,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_max_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xac,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_max_num_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xac,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_min_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xec,0xca,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_min_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xec,0xca,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_min_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xec,0xca,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_min_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xec,0xca,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_min_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xec,0xca,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_min_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xec,0xca,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_min_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xec,0xca,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xec,0xca,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xec,0xca,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xec,0xca,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_min_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xec,0xca,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xec,0xca,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xec,0xca,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xec,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xec,0xca,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xec,0xca,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_min_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xec,0xca,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_min_num_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xec,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_ashrrev_i32 v6, v1, v255 +// GFX12: v_dual_mov_b32 v255, v4 :: v_dual_ashrrev_i32 v6, v1, v255 ; encoding: [0x04,0x01,0x2c,0xca,0x01,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_ashrrev_i32 v6, v255, v255 +// GFX12: v_dual_mov_b32 v255, v1 :: v_dual_ashrrev_i32 v6, v255, v255 ; encoding: [0x01,0x01,0x2c,0xca,0xff,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_ashrrev_i32 v6, v2, v255 +// GFX12: v_dual_mov_b32 v255, v255 :: v_dual_ashrrev_i32 v6, v2, v255 ; encoding: [0xff,0x01,0x2c,0xca,0x02,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_ashrrev_i32 v6, v3, v255 +// GFX12: v_dual_mov_b32 v255, v2 :: v_dual_ashrrev_i32 v6, v3, v255 ; encoding: [0x02,0x01,0x2c,0xca,0x03,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_ashrrev_i32 v6, v4, v255 +// GFX12: v_dual_mov_b32 v255, v3 :: v_dual_ashrrev_i32 v6, v4, v255 ; encoding: [0x03,0x01,0x2c,0xca,0x04,0xff,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_ashrrev_i32 v6, s1, v255 +// GFX12: v_dual_mov_b32 v255, s105 :: v_dual_ashrrev_i32 v6, s1, v255 ; encoding: [0x69,0x00,0x2c,0xca,0x01,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_ashrrev_i32 v6, s105, v255 +// GFX12: v_dual_mov_b32 v255, s1 :: v_dual_ashrrev_i32 v6, s105, v255 ; encoding: [0x01,0x00,0x2c,0xca,0x69,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_ashrrev_i32 v6, vcc_lo, v255 +// GFX12: v_dual_mov_b32 v255, ttmp15 :: v_dual_ashrrev_i32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x2c,0xca,0x6a,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_ashrrev_i32 v6, vcc_hi, v255 +// GFX12: v_dual_mov_b32 v255, exec_hi :: v_dual_ashrrev_i32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x2c,0xca,0x6b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_ashrrev_i32 v6, ttmp15, v255 +// GFX12: v_dual_mov_b32 v255, exec_lo :: v_dual_ashrrev_i32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x2c,0xca,0x7b,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_ashrrev_i32 v6, m0, v255 +// GFX12: v_dual_mov_b32 v255, m0 :: v_dual_ashrrev_i32 v6, m0, v255 ; encoding: [0x7d,0x00,0x2c,0xca,0x7d,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_ashrrev_i32 v6, exec_lo, v255 +// GFX12: v_dual_mov_b32 v255, vcc_hi :: v_dual_ashrrev_i32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x2c,0xca,0x7e,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_ashrrev_i32 v6, exec_hi, v255 +// GFX12: v_dual_mov_b32 v255, vcc_lo :: v_dual_ashrrev_i32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x2c,0xca,0x7f,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0xaf123456 :: v_dual_ashrrev_i32 v6, null, v255 +// GFX12: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_ashrrev_i32 v6, null, v255 ; encoding: [0xff,0x00,0x2c,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_ashrrev_i32 v6, -1, v255 +// GFX12: v_dual_mov_b32 v255, src_scc :: v_dual_ashrrev_i32 v6, -1, v255 ; encoding: [0xfd,0x00,0x2c,0xca,0xc1,0xfe,0x07,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_ashrrev_i32 v6, 0.5, v3 +// GFX12: v_dual_mov_b32 v255, 0.5 :: v_dual_ashrrev_i32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x2c,0xca,0xf0,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_ashrrev_i32 v6, src_scc, v4 +// GFX12: v_dual_mov_b32 v255, -1 :: v_dual_ashrrev_i32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x2c,0xca,0xfd,0x08,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v6, null :: v_dual_ashrrev_i32 v255, 0xaf123456, v5 +// GFX12: v_dual_mov_b32 v6, null :: v_dual_ashrrev_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x2c,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xec,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xec,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xec,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xec,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xec,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xec,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xec,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xec,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xec,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xec,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xec,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xec,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xec,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xec,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xec,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xec,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xec,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xec,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_mul_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xec,0xc8,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_mul_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xec,0xc8,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_mul_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xec,0xc8,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_mul_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xec,0xc8,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_mul_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xec,0xc8,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_mul_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xec,0xc8,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_mul_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xec,0xc8,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xec,0xc8,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xec,0xc8,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xec,0xc8,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_mul_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xec,0xc8,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xec,0xc8,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xec,0xc8,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xec,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xec,0xc8,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xec,0xc8,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_mul_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xec,0xc8,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_mul_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xec,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_sub_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6c,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_sub_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6c,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_sub_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6c,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_sub_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6c,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_sub_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6c,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_sub_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x6c,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_sub_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x6c,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x6c,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x6c,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x6c,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_sub_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6c,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x6c,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x6c,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6c,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6c,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_sub_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6c,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_sub_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 +// GFX12: v_dual_subrev_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xac,0xc9,0x01,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 +// GFX12: v_dual_subrev_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xac,0xc9,0xff,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 +// GFX12: v_dual_subrev_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xac,0xc9,0x02,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 +// GFX12: v_dual_subrev_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xac,0xc9,0x03,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 +// GFX12: v_dual_subrev_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xac,0xc9,0x04,0x07,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 +// GFX12: v_dual_subrev_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xac,0xc9,0x01,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 +// GFX12: v_dual_subrev_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xac,0xc9,0x69,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 +// GFX12: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xac,0xc9,0x6a,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 +// GFX12: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xac,0xc9,0x6b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 +// GFX12: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xac,0xc9,0x7b,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 +// GFX12: v_dual_subrev_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xac,0xc9,0x7d,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xac,0xc9,0x7e,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 +// GFX12: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xac,0xc9,0x7f,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 +// GFX12: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xac,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 +// GFX12: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xac,0xc9,0xc1,0x06,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 +// GFX12: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xac,0xc9,0xf0,0x04,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 +// GFX12: v_dual_subrev_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xac,0xc9,0xfd,0x0a,0x06,0xff] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 +// GFX12: v_dual_subrev_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xac,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vopd3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd3.s new file mode 100644 index 0000000000000..1b7699a579c9b --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd3.s @@ -0,0 +1,19064 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s + +v_dual_add_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_add_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x10,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_add_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x10,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_add_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x10,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_add_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x10,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_add_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x10,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_add_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x10,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_add_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x10,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_add_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x10,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_add_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x10,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_add_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x10,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_add_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x10,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x10,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x10,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_add_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x10,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 ; encoding: [0x04,0x21,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x69,0x40,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x01,0x40,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x40,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x40,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x40,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x40,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x40,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x69,0x00,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x01,0x00,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7b,0x00,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x7f,0x00,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x7e,0x00,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x6b,0x00,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x6a,0x00,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v28, -v15, v15, s46 :: v_dual_cndmask_b32 v29, -v13, -v13, s46 +// GFX1250: v_dual_cndmask_b32 v28, -v15, v15, s46 :: v_dual_cndmask_b32 v29, -v13, -v13, s46 ; encoding: [0x0f,0x91,0x24,0xcf,0x0d,0x33,0x0f,0x2e,0x1c,0x0d,0x2e,0x1d] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x69,0x00,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x01,0x00,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x00,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x00,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x00,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x00,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x00,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x69,0x10,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x01,0x10,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7b,0x10,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x7f,0x10,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x7e,0x10,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x6b,0x10,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x6a,0x10,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x69,0xa0,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x01,0xa0,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7b,0xa0,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x7f,0xa0,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x7e,0xa0,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0xa0,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0xa0,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x69,0xb0,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x01,0xb0,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7b,0xb0,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x7f,0xb0,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x7e,0xb0,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0xb0,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0xb0,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v255, vcc_lo :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_cndmask_b32 v255, v4, v255, vcc_lo :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x24,0xcf,0x01,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v255, vcc_lo :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_cndmask_b32 v255, v1, v255, vcc_lo :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x24,0xcf,0xff,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v255, vcc_lo :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_cndmask_b32 v255, v255, v255, vcc_lo :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x24,0xcf,0x02,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v255, vcc_lo :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v255, vcc_lo :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x24,0xcf,0x03,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v255, vcc_lo :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_cndmask_b32 v255, v3, v255, vcc_lo :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x24,0xcf,0x04,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v255, vcc_lo :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_cndmask_b32 v255, s105, v255, vcc_lo :: v_dual_mov_b32 v7, s105 ; encoding: [0x69,0x80,0x24,0xcf,0x69,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v255, vcc_lo :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_cndmask_b32 v255, s1, v255, vcc_lo :: v_dual_mov_b32 v7, s1 ; encoding: [0x01,0x80,0x24,0xcf,0x01,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v255, vcc_lo :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v255, vcc_lo :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7b,0x80,0x24,0xcf,0x7b,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v255, vcc_lo :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v255, vcc_lo :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x7f,0x80,0x24,0xcf,0x7f,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v255, vcc_lo :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v255, vcc_lo :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x7e,0x80,0x24,0xcf,0x7e,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v255, vcc_lo :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_cndmask_b32 v255, m0, v255, vcc_lo :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x24,0xcf,0x7d,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v255, vcc_lo :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v255, vcc_lo :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x6b,0x80,0x24,0xcf,0x6b,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v255, vcc_lo :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v255, vcc_lo :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x6a,0x80,0x24,0xcf,0x6a,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v255, vcc_lo :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v255, vcc_lo :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x24,0xcf,0xc1,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x69,0x70,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x01,0x70,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x70,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x70,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x70,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x70,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x70,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x69,0x30,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x01,0x30,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x30,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x30,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x30,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x30,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x30,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x69,0x50,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x01,0x50,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x50,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x50,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x50,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x50,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x50,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x69,0x60,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x01,0x60,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x60,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x60,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x60,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x60,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x60,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_bitop2_b32 v7, v1, v3 bitop3:1 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_bitop2_b32 v7, v1, v3 bitop3:1 ; encoding: [0x04,0x21,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x01,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, v4, v2 :: v_dual_fmac_f32 v9, v1, v3 +// GFX1250: v_dual_fmac_f32 v7, v4, v2 :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x04,0x01,0x00,0xcf,0x01,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmac_f32 v9, v255, v3 +// GFX1250: v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmac_f32 v9, v255, v3 ; encoding: [0x01,0x01,0x00,0xcf,0xff,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, v255, v2 :: v_dual_fmac_f32 v9, v2, v3 +// GFX1250: v_dual_fmac_f32 v7, v255, v2 :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0xff,0x01,0x00,0xcf,0x02,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, v2, v2 :: v_dual_fmac_f32 v9, v3, v3 +// GFX1250: v_dual_fmac_f32 v7, v2, v2 :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x02,0x01,0x00,0xcf,0x03,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, v3, v2 :: v_dual_fmac_f32 v9, v4, v3 +// GFX1250: v_dual_fmac_f32 v7, v3, v2 :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x03,0x01,0x00,0xcf,0x04,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, s105, v2 :: v_dual_fmac_f32 v9, s1, v3 +// GFX1250: v_dual_fmac_f32 v7, s105, v2 :: v_dual_fmac_f32 v9, s1, v3 ; encoding: [0x69,0x00,0x00,0xcf,0x01,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, s1, v2 :: v_dual_fmac_f32 v9, s105, v3 +// GFX1250: v_dual_fmac_f32 v7, s1, v2 :: v_dual_fmac_f32 v9, s105, v3 ; encoding: [0x01,0x00,0x00,0xcf,0x69,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, ttmp15, v2 :: v_dual_fmac_f32 v9, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v7, ttmp15, v2 :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7b,0x00,0x00,0xcf,0x6a,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, exec_hi, v2 :: v_dual_fmac_f32 v9, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v7, exec_hi, v2 :: v_dual_fmac_f32 v9, vcc_hi, v3 ; encoding: [0x7f,0x00,0x00,0xcf,0x6b,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, exec_lo, v2 :: v_dual_fmac_f32 v9, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v7, exec_lo, v2 :: v_dual_fmac_f32 v9, ttmp15, v3 ; encoding: [0x7e,0x00,0x00,0xcf,0x7b,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, m0, v2 :: v_dual_fmac_f32 v9, m0, v3 +// GFX1250: v_dual_fmac_f32 v7, m0, v2 :: v_dual_fmac_f32 v9, m0, v3 ; encoding: [0x7d,0x00,0x00,0xcf,0x7d,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, vcc_hi, v2 :: v_dual_fmac_f32 v9, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v7, vcc_hi, v2 :: v_dual_fmac_f32 v9, exec_lo, v3 ; encoding: [0x6b,0x00,0x00,0xcf,0x7e,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, vcc_lo, v2 :: v_dual_fmac_f32 v9, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v7, vcc_lo, v2 :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x00,0xcf,0x7f,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, src_scc, v2 :: v_dual_fmac_f32 v9, -1, v3 +// GFX1250: v_dual_fmac_f32 v7, src_scc, v2 :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x00,0xcf,0xc1,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, 0.5, v3 :: v_dual_fmac_f32 v9, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v7, 0.5, v3 :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x00,0xcf,0xf0,0x00,0x03,0x00,0x07,0x02,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v7, -1, v4 :: v_dual_fmac_f32 v9, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v7, -1, v4 :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x00,0xcf,0xfd,0x00,0x04,0x00,0x07,0x05,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_fmac_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x00,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_fmac_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x00,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_fmac_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x00,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x00,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_fmac_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x00,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_fmac_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x00,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_fmac_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x00,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x00,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x00,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x00,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_fmac_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x00,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x00,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x00,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x00,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:20 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x14 ; encoding: [0x04,0x21,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x14,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_max_num_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x28,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_max_num_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x28,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_max_num_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x28,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x28,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_max_num_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x28,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_max_num_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x28,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_max_num_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x28,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x28,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x28,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x28,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_max_num_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x28,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x28,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x28,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x28,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x6e +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x6e ; encoding: [0x04,0x21,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6e,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_min_num_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x2c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_min_num_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x2c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_min_num_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x2c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x2c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_min_num_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x2c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_min_num_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x2c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_min_num_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x2c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x2c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x2c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x2c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_min_num_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x2c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x2c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x2c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x2c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:255 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0xff ; encoding: [0x04,0x21,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0xff,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_add_f32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_add_f32 v7, v1, v255 ; encoding: [0x04,0x41,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_add_f32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_add_f32 v7, v255, v255 ; encoding: [0x01,0x41,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_add_f32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_add_f32 v7, v2, v255 ; encoding: [0xff,0x41,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_add_f32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_add_f32 v7, v3, v255 ; encoding: [0x02,0x41,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_add_f32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_add_f32 v7, v4, v255 ; encoding: [0x03,0x41,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_add_f32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_add_f32 v7, s1, v255 ; encoding: [0x69,0x40,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_add_f32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_add_f32 v7, s105, v255 ; encoding: [0x01,0x40,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_add_f32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_add_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x40,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_add_f32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_add_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x40,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_add_f32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_add_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x40,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_add_f32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_add_f32 v7, m0, v255 ; encoding: [0x7d,0x40,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_add_f32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_add_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x40,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_add_f32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_add_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x40,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_add_f32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_add_f32 v7, -1, v255 ; encoding: [0xfd,0x40,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_add_f32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_add_f32 v7, 0.5, v3 ; encoding: [0xf0,0x40,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_add_f32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_add_f32 v7, src_scc, v4 ; encoding: [0xc1,0x40,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_add_nc_u32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_add_nc_u32 v7, v1, v255 ; encoding: [0x04,0x01,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_add_nc_u32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_add_nc_u32 v7, v255, v255 ; encoding: [0x01,0x01,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_add_nc_u32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_add_nc_u32 v7, v2, v255 ; encoding: [0xff,0x01,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_add_nc_u32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_add_nc_u32 v7, v3, v255 ; encoding: [0x02,0x01,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_add_nc_u32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_add_nc_u32 v7, v4, v255 ; encoding: [0x03,0x01,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_add_nc_u32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_add_nc_u32 v7, s1, v255 ; encoding: [0x69,0x00,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_add_nc_u32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_add_nc_u32 v7, s105, v255 ; encoding: [0x01,0x00,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_add_nc_u32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_add_nc_u32 v7, vcc_lo, v255 ; encoding: [0x7b,0x00,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_add_nc_u32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_add_nc_u32 v7, vcc_hi, v255 ; encoding: [0x7f,0x00,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_add_nc_u32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_add_nc_u32 v7, ttmp15, v255 ; encoding: [0x7e,0x00,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_add_nc_u32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_add_nc_u32 v7, m0, v255 ; encoding: [0x7d,0x00,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_add_nc_u32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_add_nc_u32 v7, exec_lo, v255 ; encoding: [0x6b,0x00,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_add_nc_u32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_add_nc_u32 v7, exec_hi, v255 ; encoding: [0x6a,0x00,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_add_nc_u32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_add_nc_u32 v7, -1, v255 ; encoding: [0xfd,0x00,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_add_nc_u32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_add_nc_u32 v7, 0.5, v3 ; encoding: [0xf0,0x00,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_add_nc_u32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_add_nc_u32 v7, src_scc, v4 ; encoding: [0xc1,0x00,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v7, v1, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v7, v1, v255, vcc_lo ; encoding: [0x04,0x91,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_cndmask_b32 v7, v255, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_cndmask_b32 v7, v255, v255, vcc_lo ; encoding: [0x01,0x91,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_cndmask_b32 v7, v2, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_cndmask_b32 v7, v2, v255, vcc_lo ; encoding: [0xff,0x91,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_cndmask_b32 v7, v3, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_cndmask_b32 v7, v3, v255, vcc_lo ; encoding: [0x02,0x91,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_cndmask_b32 v7, v4, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_cndmask_b32 v7, v4, v255, vcc_lo ; encoding: [0x03,0x91,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_cndmask_b32 v7, s105, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_cndmask_b32 v7, s105, v255, vcc_lo ; encoding: [0x69,0x90,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_cndmask_b32 v7, s1, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_cndmask_b32 v7, s1, v255, vcc_lo ; encoding: [0x01,0x90,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_cndmask_b32 v7, ttmp15, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_cndmask_b32 v7, ttmp15, v255, vcc_lo ; encoding: [0x7b,0x90,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_cndmask_b32 v7, exec_hi, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_cndmask_b32 v7, exec_hi, v255, vcc_lo ; encoding: [0x7f,0x90,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_cndmask_b32 v7, exec_lo, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_cndmask_b32 v7, exec_lo, v255, vcc_lo ; encoding: [0x7e,0x90,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_cndmask_b32 v7, m0, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_cndmask_b32 v7, m0, v255, vcc_lo ; encoding: [0x7d,0x90,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_cndmask_b32 v7, vcc_hi, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_cndmask_b32 v7, vcc_hi, v255, vcc_lo ; encoding: [0x6b,0x90,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_cndmask_b32 v7, vcc_lo, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_cndmask_b32 v7, vcc_lo, v255, vcc_lo ; encoding: [0x6a,0x90,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_cndmask_b32 v7, -1, v255, vcc_lo +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_cndmask_b32 v7, -1, v255, vcc_lo ; encoding: [0xfd,0x90,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_cndmask_b32 v7, 0.5, v3, vcc_lo +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_cndmask_b32 v7, 0.5, v3, vcc_lo ; encoding: [0xf0,0x90,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_cndmask_b32 v7, src_scc, v4, vcc_lo +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_cndmask_b32 v7, src_scc, v4, vcc_lo ; encoding: [0xc1,0x90,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_fmac_f32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_fmac_f32 v7, v1, v255 ; encoding: [0x04,0x01,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_fmac_f32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_fmac_f32 v7, v255, v255 ; encoding: [0x01,0x01,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_fmac_f32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_fmac_f32 v7, v2, v255 ; encoding: [0xff,0x01,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_fmac_f32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_fmac_f32 v7, v3, v255 ; encoding: [0x02,0x01,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_fmac_f32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_fmac_f32 v7, v4, v255 ; encoding: [0x03,0x01,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_fmac_f32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_fmac_f32 v7, s1, v255 ; encoding: [0x69,0x00,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_fmac_f32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_fmac_f32 v7, s105, v255 ; encoding: [0x01,0x00,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_fmac_f32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_fmac_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x00,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_fmac_f32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_fmac_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x00,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_fmac_f32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_fmac_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x00,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_fmac_f32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_fmac_f32 v7, m0, v255 ; encoding: [0x7d,0x00,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_fmac_f32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_fmac_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x00,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_fmac_f32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_fmac_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x00,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_fmac_f32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_fmac_f32 v7, -1, v255 ; encoding: [0xfd,0x00,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_fmac_f32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_fmac_f32 v7, 0.5, v3 ; encoding: [0xf0,0x00,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_fmac_f32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_fmac_f32 v7, src_scc, v4 ; encoding: [0xc1,0x00,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_lshlrev_b32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_lshlrev_b32 v7, v1, v255 ; encoding: [0x04,0x11,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_lshlrev_b32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_lshlrev_b32 v7, v255, v255 ; encoding: [0x01,0x11,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_lshlrev_b32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_lshlrev_b32 v7, v2, v255 ; encoding: [0xff,0x11,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_lshlrev_b32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_lshlrev_b32 v7, v3, v255 ; encoding: [0x02,0x11,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_lshlrev_b32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_lshlrev_b32 v7, v4, v255 ; encoding: [0x03,0x11,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_lshlrev_b32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_lshlrev_b32 v7, s1, v255 ; encoding: [0x69,0x10,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_lshlrev_b32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_lshlrev_b32 v7, s105, v255 ; encoding: [0x01,0x10,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_lshlrev_b32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_lshlrev_b32 v7, vcc_lo, v255 ; encoding: [0x7b,0x10,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_lshlrev_b32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_lshlrev_b32 v7, vcc_hi, v255 ; encoding: [0x7f,0x10,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_lshlrev_b32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_lshlrev_b32 v7, ttmp15, v255 ; encoding: [0x7e,0x10,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_lshlrev_b32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_lshlrev_b32 v7, m0, v255 ; encoding: [0x7d,0x10,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_lshlrev_b32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_lshlrev_b32 v7, exec_lo, v255 ; encoding: [0x6b,0x10,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_lshlrev_b32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_lshlrev_b32 v7, exec_hi, v255 ; encoding: [0x6a,0x10,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_lshlrev_b32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_lshlrev_b32 v7, -1, v255 ; encoding: [0xfd,0x10,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_lshlrev_b32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_lshlrev_b32 v7, 0.5, v3 ; encoding: [0xf0,0x10,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_lshlrev_b32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_lshlrev_b32 v7, src_scc, v4 ; encoding: [0xc1,0x10,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_max_num_f32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_max_num_f32 v7, v1, v255 ; encoding: [0x04,0xa1,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_max_num_f32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_max_num_f32 v7, v255, v255 ; encoding: [0x01,0xa1,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_max_num_f32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_max_num_f32 v7, v2, v255 ; encoding: [0xff,0xa1,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_max_num_f32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_max_num_f32 v7, v3, v255 ; encoding: [0x02,0xa1,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_max_num_f32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_max_num_f32 v7, v4, v255 ; encoding: [0x03,0xa1,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_max_num_f32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_max_num_f32 v7, s1, v255 ; encoding: [0x69,0xa0,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_max_num_f32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_max_num_f32 v7, s105, v255 ; encoding: [0x01,0xa0,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_max_num_f32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_max_num_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0xa0,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_max_num_f32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_max_num_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0xa0,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_max_num_f32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_max_num_f32 v7, ttmp15, v255 ; encoding: [0x7e,0xa0,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_max_num_f32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_max_num_f32 v7, m0, v255 ; encoding: [0x7d,0xa0,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_max_num_f32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_max_num_f32 v7, exec_lo, v255 ; encoding: [0x6b,0xa0,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_max_num_f32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_max_num_f32 v7, exec_hi, v255 ; encoding: [0x6a,0xa0,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_max_num_f32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_max_num_f32 v7, -1, v255 ; encoding: [0xfd,0xa0,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_max_num_f32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_max_num_f32 v7, 0.5, v3 ; encoding: [0xf0,0xa0,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_max_num_f32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_max_num_f32 v7, src_scc, v4 ; encoding: [0xc1,0xa0,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_min_num_f32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_min_num_f32 v7, v1, v255 ; encoding: [0x04,0xb1,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_min_num_f32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_min_num_f32 v7, v255, v255 ; encoding: [0x01,0xb1,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_min_num_f32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_min_num_f32 v7, v2, v255 ; encoding: [0xff,0xb1,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_min_num_f32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_min_num_f32 v7, v3, v255 ; encoding: [0x02,0xb1,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_min_num_f32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_min_num_f32 v7, v4, v255 ; encoding: [0x03,0xb1,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_min_num_f32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_min_num_f32 v7, s1, v255 ; encoding: [0x69,0xb0,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_min_num_f32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_min_num_f32 v7, s105, v255 ; encoding: [0x01,0xb0,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_min_num_f32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_min_num_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0xb0,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_min_num_f32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_min_num_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0xb0,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_min_num_f32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_min_num_f32 v7, ttmp15, v255 ; encoding: [0x7e,0xb0,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_min_num_f32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_min_num_f32 v7, m0, v255 ; encoding: [0x7d,0xb0,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_min_num_f32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_min_num_f32 v7, exec_lo, v255 ; encoding: [0x6b,0xb0,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_min_num_f32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_min_num_f32 v7, exec_hi, v255 ; encoding: [0x6a,0xb0,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_min_num_f32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_min_num_f32 v7, -1, v255 ; encoding: [0xfd,0xb0,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_min_num_f32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_min_num_f32 v7, 0.5, v3 ; encoding: [0xf0,0xb0,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_min_num_f32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_min_num_f32 v7, src_scc, v4 ; encoding: [0xc1,0xb0,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v25, v8 :: v_dual_mov_b32 v13, v16 +// GFX1250: v_dual_mov_b32 v25, v8 :: v_dual_mov_b32 v13, v16 ; encoding: [0x08,0x81,0x20,0xcf,0x10,0x01,0x00,0x00,0x19,0x00,0x00,0x0d] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_mul_dx9_zero_f32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mul_dx9_zero_f32 v7, v1, v255 ; encoding: [0x04,0x71,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_mul_dx9_zero_f32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mul_dx9_zero_f32 v7, v255, v255 ; encoding: [0x01,0x71,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_mul_dx9_zero_f32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mul_dx9_zero_f32 v7, v2, v255 ; encoding: [0xff,0x71,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v255 ; encoding: [0x02,0x71,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_mul_dx9_zero_f32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mul_dx9_zero_f32 v7, v4, v255 ; encoding: [0x03,0x71,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_mul_dx9_zero_f32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mul_dx9_zero_f32 v7, s1, v255 ; encoding: [0x69,0x70,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_mul_dx9_zero_f32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mul_dx9_zero_f32 v7, s105, v255 ; encoding: [0x01,0x70,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x70,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x70,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x70,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_mul_dx9_zero_f32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mul_dx9_zero_f32 v7, m0, v255 ; encoding: [0x7d,0x70,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x70,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x70,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_mul_dx9_zero_f32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mul_dx9_zero_f32 v7, -1, v255 ; encoding: [0xfd,0x70,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v3 ; encoding: [0xf0,0x70,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v4 ; encoding: [0xc1,0x70,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_mul_f32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mul_f32 v7, v1, v255 ; encoding: [0x04,0x31,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_mul_f32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mul_f32 v7, v255, v255 ; encoding: [0x01,0x31,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_mul_f32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mul_f32 v7, v2, v255 ; encoding: [0xff,0x31,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_mul_f32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mul_f32 v7, v3, v255 ; encoding: [0x02,0x31,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_mul_f32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mul_f32 v7, v4, v255 ; encoding: [0x03,0x31,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_mul_f32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mul_f32 v7, s1, v255 ; encoding: [0x69,0x30,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_mul_f32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mul_f32 v7, s105, v255 ; encoding: [0x01,0x30,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_f32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x30,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_mul_f32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mul_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x30,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_mul_f32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mul_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x30,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_mul_f32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mul_f32 v7, m0, v255 ; encoding: [0x7d,0x30,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_f32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x30,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_f32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x30,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_mul_f32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mul_f32 v7, -1, v255 ; encoding: [0xfd,0x30,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_mul_f32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mul_f32 v7, 0.5, v3 ; encoding: [0xf0,0x30,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_mul_f32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mul_f32 v7, src_scc, v4 ; encoding: [0xc1,0x30,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_sub_f32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_sub_f32 v7, v1, v255 ; encoding: [0x04,0x51,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_sub_f32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_sub_f32 v7, v255, v255 ; encoding: [0x01,0x51,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_sub_f32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_sub_f32 v7, v2, v255 ; encoding: [0xff,0x51,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_sub_f32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_sub_f32 v7, v3, v255 ; encoding: [0x02,0x51,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_sub_f32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_sub_f32 v7, v4, v255 ; encoding: [0x03,0x51,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_sub_f32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_sub_f32 v7, s1, v255 ; encoding: [0x69,0x50,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_sub_f32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_sub_f32 v7, s105, v255 ; encoding: [0x01,0x50,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_f32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x50,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_sub_f32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_sub_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x50,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_sub_f32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_sub_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x50,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_sub_f32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_sub_f32 v7, m0, v255 ; encoding: [0x7d,0x50,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_f32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x50,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_f32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x50,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_sub_f32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_sub_f32 v7, -1, v255 ; encoding: [0xfd,0x50,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_sub_f32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_sub_f32 v7, 0.5, v3 ; encoding: [0xf0,0x50,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_sub_f32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_sub_f32 v7, src_scc, v4 ; encoding: [0xc1,0x50,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_subrev_f32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_subrev_f32 v7, v1, v255 ; encoding: [0x04,0x61,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_subrev_f32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_subrev_f32 v7, v255, v255 ; encoding: [0x01,0x61,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_subrev_f32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_subrev_f32 v7, v2, v255 ; encoding: [0xff,0x61,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_subrev_f32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_subrev_f32 v7, v3, v255 ; encoding: [0x02,0x61,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_subrev_f32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_subrev_f32 v7, v4, v255 ; encoding: [0x03,0x61,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_subrev_f32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_subrev_f32 v7, s1, v255 ; encoding: [0x69,0x60,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_subrev_f32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_subrev_f32 v7, s105, v255 ; encoding: [0x01,0x60,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_subrev_f32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_subrev_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x60,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_subrev_f32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_subrev_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x60,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_subrev_f32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_subrev_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x60,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_subrev_f32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_subrev_f32 v7, m0, v255 ; encoding: [0x7d,0x60,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_subrev_f32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_subrev_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x60,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_subrev_f32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_subrev_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x60,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_subrev_f32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_subrev_f32 v7, -1, v255 ; encoding: [0xfd,0x60,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_subrev_f32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_subrev_f32 v7, 0.5, v3 ; encoding: [0xf0,0x60,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_subrev_f32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_subrev_f32 v7, src_scc, v4 ; encoding: [0xc1,0x60,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:254 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0xfe ; encoding: [0x04,0x21,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0x03,0xfe,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x1c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x1c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x1c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x1c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x1c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x1c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x1c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x1c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x1c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x1c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x1c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x1c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x1c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x1c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x11 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x11 ; encoding: [0x04,0x21,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x11,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_mul_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x0c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_mul_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x0c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_mul_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x0c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x0c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_mul_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x0c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_mul_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x0c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_mul_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x0c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x0c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x0c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x0c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_mul_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x0c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x0c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x0c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_mul_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x0c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x71 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x71 ; encoding: [0x04,0x21,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x71,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_sub_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x14,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_sub_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x14,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_sub_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x14,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x14,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_sub_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x14,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_sub_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x14,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_sub_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x14,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x14,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x14,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x14,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_sub_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x14,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x14,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x14,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_sub_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x14,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x82 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x82 ; encoding: [0x04,0x21,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x82,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_subrev_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x18,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_subrev_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x18,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_subrev_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x18,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x18,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_subrev_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x18,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_subrev_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x18,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_subrev_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x18,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x18,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x18,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x18,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_subrev_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x18,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x18,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x18,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x18,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x83 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x83 ; encoding: [0x04,0x21,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x83,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x69,0x70,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x01,0x70,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7b,0x70,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x7f,0x70,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x7e,0x70,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x6b,0x70,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x6a,0x70,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_max_i32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_max_i32 v7, v1, v255 ; encoding: [0x04,0x71,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_max_i32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_max_i32 v7, v255, v255 ; encoding: [0x01,0x71,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_max_i32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_max_i32 v7, v2, v255 ; encoding: [0xff,0x71,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_max_i32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_max_i32 v7, v3, v255 ; encoding: [0x02,0x71,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_max_i32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_max_i32 v7, v4, v255 ; encoding: [0x03,0x71,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_max_i32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_max_i32 v7, s1, v255 ; encoding: [0x69,0x70,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_max_i32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_max_i32 v7, s105, v255 ; encoding: [0x01,0x70,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_max_i32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_max_i32 v7, vcc_lo, v255 ; encoding: [0x7b,0x70,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_max_i32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_max_i32 v7, vcc_hi, v255 ; encoding: [0x7f,0x70,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_max_i32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_max_i32 v7, ttmp15, v255 ; encoding: [0x7e,0x70,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_max_i32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_max_i32 v7, m0, v255 ; encoding: [0x7d,0x70,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_max_i32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_max_i32 v7, exec_lo, v255 ; encoding: [0x6b,0x70,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_max_i32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_max_i32 v7, exec_hi, v255 ; encoding: [0x6a,0x70,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_max_i32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_max_i32 v7, -1, v255 ; encoding: [0xfd,0x70,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_max_i32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_max_i32 v7, 0.5, v3 ; encoding: [0xf0,0x70,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_max_i32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_max_i32 v7, src_scc, v4 ; encoding: [0xc1,0x70,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x69,0x80,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x01,0x80,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7b,0x80,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x7f,0x80,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x7e,0x80,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x6b,0x80,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x6a,0x80,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_min_i32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_min_i32 v7, v1, v255 ; encoding: [0x04,0x81,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_min_i32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_min_i32 v7, v255, v255 ; encoding: [0x01,0x81,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_min_i32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_min_i32 v7, v2, v255 ; encoding: [0xff,0x81,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_min_i32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_min_i32 v7, v3, v255 ; encoding: [0x02,0x81,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_min_i32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_min_i32 v7, v4, v255 ; encoding: [0x03,0x81,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_min_i32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_min_i32 v7, s1, v255 ; encoding: [0x69,0x80,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_min_i32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_min_i32 v7, s105, v255 ; encoding: [0x01,0x80,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_min_i32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_min_i32 v7, vcc_lo, v255 ; encoding: [0x7b,0x80,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_min_i32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_min_i32 v7, vcc_hi, v255 ; encoding: [0x7f,0x80,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_min_i32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_min_i32 v7, ttmp15, v255 ; encoding: [0x7e,0x80,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_min_i32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_min_i32 v7, m0, v255 ; encoding: [0x7d,0x80,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_min_i32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_min_i32 v7, exec_lo, v255 ; encoding: [0x6b,0x80,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_min_i32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_min_i32 v7, exec_hi, v255 ; encoding: [0x6a,0x80,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_min_i32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_min_i32 v7, -1, v255 ; encoding: [0xfd,0x80,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_min_i32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_min_i32 v7, 0.5, v3 ; encoding: [0xf0,0x80,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_min_i32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_min_i32 v7, src_scc, v4 ; encoding: [0xc1,0x80,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x69,0x40,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x01,0x40,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7b,0x40,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x7f,0x40,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x7e,0x40,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x6b,0x40,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x6a,0x40,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_sub_nc_u32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_sub_nc_u32 v7, v1, v255 ; encoding: [0x04,0x41,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_sub_nc_u32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_sub_nc_u32 v7, v255, v255 ; encoding: [0x01,0x41,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_sub_nc_u32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_sub_nc_u32 v7, v2, v255 ; encoding: [0xff,0x41,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_sub_nc_u32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_sub_nc_u32 v7, v3, v255 ; encoding: [0x02,0x41,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_sub_nc_u32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_sub_nc_u32 v7, v4, v255 ; encoding: [0x03,0x41,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_sub_nc_u32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_sub_nc_u32 v7, s1, v255 ; encoding: [0x69,0x40,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_sub_nc_u32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_sub_nc_u32 v7, s105, v255 ; encoding: [0x01,0x40,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_nc_u32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_nc_u32 v7, vcc_lo, v255 ; encoding: [0x7b,0x40,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_sub_nc_u32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_sub_nc_u32 v7, vcc_hi, v255 ; encoding: [0x7f,0x40,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_sub_nc_u32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_sub_nc_u32 v7, ttmp15, v255 ; encoding: [0x7e,0x40,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_sub_nc_u32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_sub_nc_u32 v7, m0, v255 ; encoding: [0x7d,0x40,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_nc_u32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_nc_u32 v7, exec_lo, v255 ; encoding: [0x6b,0x40,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_nc_u32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_nc_u32 v7, exec_hi, v255 ; encoding: [0x6a,0x40,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_sub_nc_u32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_sub_nc_u32 v7, -1, v255 ; encoding: [0xfd,0x40,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_sub_nc_u32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_sub_nc_u32 v7, 0.5, v3 ; encoding: [0xf0,0x40,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_sub_nc_u32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_sub_nc_u32 v7, src_scc, v4 ; encoding: [0xc1,0x40,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x69,0x50,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x01,0x50,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7b,0x50,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x7f,0x50,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x7e,0x50,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x6b,0x50,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x6a,0x50,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_lshrrev_b32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_lshrrev_b32 v7, v1, v255 ; encoding: [0x04,0x51,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_lshrrev_b32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_lshrrev_b32 v7, v255, v255 ; encoding: [0x01,0x51,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_lshrrev_b32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_lshrrev_b32 v7, v2, v255 ; encoding: [0xff,0x51,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_lshrrev_b32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_lshrrev_b32 v7, v3, v255 ; encoding: [0x02,0x51,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_lshrrev_b32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_lshrrev_b32 v7, v4, v255 ; encoding: [0x03,0x51,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_lshrrev_b32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_lshrrev_b32 v7, s1, v255 ; encoding: [0x69,0x50,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_lshrrev_b32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_lshrrev_b32 v7, s105, v255 ; encoding: [0x01,0x50,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_lshrrev_b32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_lshrrev_b32 v7, vcc_lo, v255 ; encoding: [0x7b,0x50,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_lshrrev_b32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_lshrrev_b32 v7, vcc_hi, v255 ; encoding: [0x7f,0x50,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_lshrrev_b32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_lshrrev_b32 v7, ttmp15, v255 ; encoding: [0x7e,0x50,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_lshrrev_b32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_lshrrev_b32 v7, m0, v255 ; encoding: [0x7d,0x50,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_lshrrev_b32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_lshrrev_b32 v7, exec_lo, v255 ; encoding: [0x6b,0x50,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_lshrrev_b32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_lshrrev_b32 v7, exec_hi, v255 ; encoding: [0x6a,0x50,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_lshrrev_b32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_lshrrev_b32 v7, -1, v255 ; encoding: [0xfd,0x50,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_lshrrev_b32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_lshrrev_b32 v7, 0.5, v3 ; encoding: [0xf0,0x50,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_lshrrev_b32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_lshrrev_b32 v7, src_scc, v4 ; encoding: [0xc1,0x50,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x69,0x60,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x01,0x60,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7b,0x60,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x7f,0x60,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x7e,0x60,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x6b,0x60,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x6a,0x60,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_ashrrev_i32 v7, v1, v255 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_ashrrev_i32 v7, v1, v255 ; encoding: [0x04,0x61,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v1 :: v_dual_ashrrev_i32 v7, v255, v255 +// GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_ashrrev_i32 v7, v255, v255 ; encoding: [0x01,0x61,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v255 :: v_dual_ashrrev_i32 v7, v2, v255 +// GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_ashrrev_i32 v7, v2, v255 ; encoding: [0xff,0x61,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v2 :: v_dual_ashrrev_i32 v7, v3, v255 +// GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_ashrrev_i32 v7, v3, v255 ; encoding: [0x02,0x61,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v3 :: v_dual_ashrrev_i32 v7, v4, v255 +// GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_ashrrev_i32 v7, v4, v255 ; encoding: [0x03,0x61,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s105 :: v_dual_ashrrev_i32 v7, s1, v255 +// GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_ashrrev_i32 v7, s1, v255 ; encoding: [0x69,0x60,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, s1 :: v_dual_ashrrev_i32 v7, s105, v255 +// GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_ashrrev_i32 v7, s105, v255 ; encoding: [0x01,0x60,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, ttmp15 :: v_dual_ashrrev_i32 v7, vcc_lo, v255 +// GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_ashrrev_i32 v7, vcc_lo, v255 ; encoding: [0x7b,0x60,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_hi :: v_dual_ashrrev_i32 v7, vcc_hi, v255 +// GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_ashrrev_i32 v7, vcc_hi, v255 ; encoding: [0x7f,0x60,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, exec_lo :: v_dual_ashrrev_i32 v7, ttmp15, v255 +// GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_ashrrev_i32 v7, ttmp15, v255 ; encoding: [0x7e,0x60,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, m0 :: v_dual_ashrrev_i32 v7, m0, v255 +// GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_ashrrev_i32 v7, m0, v255 ; encoding: [0x7d,0x60,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_hi :: v_dual_ashrrev_i32 v7, exec_lo, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_ashrrev_i32 v7, exec_lo, v255 ; encoding: [0x6b,0x60,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, vcc_lo :: v_dual_ashrrev_i32 v7, exec_hi, v255 +// GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_ashrrev_i32 v7, exec_hi, v255 ; encoding: [0x6a,0x60,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, src_scc :: v_dual_ashrrev_i32 v7, -1, v255 +// GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_ashrrev_i32 v7, -1, v255 ; encoding: [0xfd,0x60,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, 0.5 :: v_dual_ashrrev_i32 v7, 0.5, v3 +// GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_ashrrev_i32 v7, 0.5, v3 ; encoding: [0xf0,0x60,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, -1 :: v_dual_ashrrev_i32 v7, src_scc, v4 +// GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_ashrrev_i32 v7, src_scc, v4 ; encoding: [0xc1,0x60,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 +// GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 +// GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 +// GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 +// GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 +// GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 +// GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 +// GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 +// GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 +// GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 +// GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 +// GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 +// GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x40,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x41,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x41,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x41,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x41,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x41,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x41,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x41,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x41,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x41,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x41,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x41,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x41,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x41,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x41,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x41,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x41,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x41,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x41,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x41,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x41,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x41,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x41,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x41,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x41,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x41,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x41,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x41,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x41,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x41,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x41,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x41,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x41,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_add_nc_u32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x40,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_add_nc_u32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x40,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_add_nc_u32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x40,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x40,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_add_nc_u32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x40,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_add_nc_u32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x40,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_add_nc_u32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x40,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x40,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x40,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x40,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_add_nc_u32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x40,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x40,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x40,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x40,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 +// GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 +// GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 +// GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x84 +// GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x84 ; encoding: [0x04,0x21,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x84,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x44,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x45,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x45,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x45,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x45,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x45,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x45,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x45,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x45,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x45,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x45,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x45,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x45,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x45,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x45,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x45,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x45,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x45,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x45,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x45,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x45,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x45,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x45,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x45,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x45,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x45,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x45,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x45,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x45,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x45,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x45,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x45,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x45,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x44,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x44,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x44,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x44,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x44,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x44,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x44,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x44,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x44,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x44,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x44,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x44,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x44,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x44,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 +// GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 +// GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 +// GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x85 +// GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x85 ; encoding: [0x04,0x21,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x85,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x50,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x51,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x51,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x51,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x51,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x51,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x51,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x51,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x51,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x51,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x51,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x51,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x51,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x51,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x51,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x51,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x51,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x51,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x51,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x51,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x51,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x51,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x51,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x51,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x51,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x51,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x51,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x51,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x51,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x51,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x51,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x51,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x51,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x50,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x50,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x50,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x50,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x50,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x50,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x50,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x50,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x50,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x50,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x50,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x50,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x50,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x50,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 +// GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 +// GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 +// GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x86 +// GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x86 ; encoding: [0x04,0x21,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x86,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x58,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x59,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x59,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x59,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x59,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x59,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x59,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x59,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x59,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x59,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x59,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x59,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x59,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x59,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x59,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x59,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x59,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x59,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x59,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x59,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x59,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x59,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x59,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x59,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x59,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x59,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x59,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x59,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x59,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x59,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x59,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x59,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x59,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x58,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x58,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x58,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x58,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x58,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x58,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x58,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x58,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x58,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x58,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x58,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x58,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x58,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x58,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 +// GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 +// GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 +// GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x87 +// GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x87 ; encoding: [0x04,0x21,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x87,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x54,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x55,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x55,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x55,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x55,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x55,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x55,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x55,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x55,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x55,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x55,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x55,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x55,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x55,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x55,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x55,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x55,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x55,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x55,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x55,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x55,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x55,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x55,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x55,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x55,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x55,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x55,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x55,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x55,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x55,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x55,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x55,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x55,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x54,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x54,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x54,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x54,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x54,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x54,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x54,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x54,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x54,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x54,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x54,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x54,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x54,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x54,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x55,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 +// GFX1250: v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 +// GFX1250: v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 +// GFX1250: v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x88 +// GFX1250: v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x88 ; encoding: [0x04,0x21,0x55,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x88,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v254, v4, v2, v10 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v254, v4, v2, v10 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_add_f32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_add_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_add_f32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_add_f32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_add_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_add_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_add_f32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_add_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x4d,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_add_nc_u32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x4d,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x4d,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x4d,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x4d,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_add_nc_u32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x4d,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_add_nc_u32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x4d,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x4d,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_add_nc_u32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x4d,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_add_nc_u32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x4d,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_add_nc_u32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x4d,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_add_nc_u32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x4d,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x4d,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x4d,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x4d,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x4d,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_fmac_f32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_fmac_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_fmac_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_fmac_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_fmac_f32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_fmac_f32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_fmac_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_fmac_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_fmac_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_fmac_f32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_fmac_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_fmac_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_fmac_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_fmac_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_fmac_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x4d,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_lshlrev_b32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x4d,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x4d,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x4d,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x4d,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_lshlrev_b32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x4d,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_lshlrev_b32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x4d,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x4d,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x4d,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_lshlrev_b32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x4d,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_lshlrev_b32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x4d,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_lshlrev_b32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x4d,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x4d,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x4d,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x4d,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x4d,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_max_num_f32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_max_num_f32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_max_num_f32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_max_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_max_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_max_num_f32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_max_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_min_num_f32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_min_num_f32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_min_num_f32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_min_num_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_min_num_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_min_num_f32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_min_num_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v255, v10 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_fma_f32 v255, v4, v255, v10 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x4c,0xcf,0x01,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v255, v10 :: v_dual_mov_b32 v7, v255 +// GFX1250: v_dual_fma_f32 v255, v1, v255, v10 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x4c,0xcf,0xff,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v255, v10 :: v_dual_mov_b32 v7, v2 +// GFX1250: v_dual_fma_f32 v255, v255, v255, v10 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x4c,0xcf,0x02,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v255, v10 :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v255, v10 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x4c,0xcf,0x03,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v255, v10 :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_fma_f32 v255, v3, v255, v10 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x4c,0xcf,0x04,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v255, v10 :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_fma_f32 v255, s105, v255, v10 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x4c,0xcf,0x01,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v255, v10 :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_fma_f32 v255, s1, v255, v10 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x4c,0xcf,0x69,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v255, v10 :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_fma_f32 v255, ttmp15, v255, v10 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x4c,0xcf,0x6a,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v255, v10 :: v_dual_mov_b32 v7, vcc_hi +// GFX1250: v_dual_fma_f32 v255, exec_hi, v255, v10 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x4c,0xcf,0x6b,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v255, v10 :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v255, v10 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x4c,0xcf,0x7b,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v255, v10 :: v_dual_mov_b32 v7, m0 +// GFX1250: v_dual_fma_f32 v255, m0, v255, v10 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x4c,0xcf,0x7d,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v255, v10 :: v_dual_mov_b32 v7, exec_lo +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v255, v10 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x4c,0xcf,0x7e,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v255, v10 :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v255, v10 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x4c,0xcf,0x7f,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v255, v10 :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_fma_f32 v255, src_scc, v255, v10 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x4c,0xcf,0xc1,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_mul_f32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_mul_f32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_mul_f32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_mul_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_mul_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_mul_f32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_mul_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_sub_f32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_sub_f32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_sub_f32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_sub_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_sub_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_sub_f32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_sub_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_subrev_f32 v7, v255, v3 +// GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_subrev_f32 v7, s1, v3 +// GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_subrev_f32 v7, s105, v3 +// GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_subrev_f32 v7, vcc_hi, v3 +// GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_subrev_f32 v7, ttmp15, v3 +// GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_subrev_f32 v7, m0, v3 +// GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_subrev_f32 v7, exec_lo, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_sub_nc_u32 v9, v1, v13 +// GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_lshrrev_b32 v9, v1, v13 +// GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_ashrrev_i32 v9, v1, v13 +// GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_bitop2_b32 v7, v1, v3 +// GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_bitop2_b32 v7, v1, v3 ; encoding: [0x04,0x21,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[252:253], v[6:7], v[4:5], v[10:11] :: v_dual_add_f32 v8, v1, v3 +// GFX1250: v_dual_fma_f64 v[252:253], v[6:7], v[4:5], v[10:11] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfc,0x03,0x00,0x08] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_add_f32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_add_f32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_add_f32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x81,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x81,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x81,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x81,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x81,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x81,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x81,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x81,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x81,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x81,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x81,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x81,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[2:3], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], s[2:3], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x80,0xcf,0x0a,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_fmac_f32 v9, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_fmac_f32 v9, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x81,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x81,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x81,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x81,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x81,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x81,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x81,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x81,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x81,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x81,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x81,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x81,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x80,0xcf,0x03,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v253 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x80,0xcf,0xfd,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x80,0xcf,0x04,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x80,0xcf,0x03,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x80,0xcf,0x04,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[254:255], v[10:11] :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[254:255], v[10:11] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x80,0xcf,0x01,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[2:3], v[254:255], v[10:11] :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_fma_f64 v[254:255], s[2:3], v[254:255], v[10:11] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x80,0xcf,0x69,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[254:255], v[10:11] :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[254:255], v[10:11] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x80,0xcf,0x6a,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[254:255], v[10:11] :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[254:255], v[10:11] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x80,0xcf,0x7b,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[254:255], v[10:11] :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[254:255], v[10:11] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x80,0xcf,0x7f,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[254:255], v[10:11] :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[254:255], v[10:11] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x80,0xcf,0xc1,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_mul_f32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_sub_f32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v253, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v5, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v15, v3 +// GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_fma_f32 v9, v1, v14, v4 +// GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x04,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_sub_nc_u32 v9, v1, v14 +// GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_lshrrev_b32 v9, v1, v14 +// GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_ashrrev_i32 v9, v1, v14 +// GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x91 +// GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x91 ; encoding: [0x06,0x21,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x03,0x91,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 +// GFX1250: v_dual_add_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x84,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x85,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x85,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x85,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x85,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x85,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x85,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x85,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x85,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x85,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x85,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x85,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x85,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x84,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x85,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x85,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x85,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x85,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x85,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x85,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x85,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x85,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x85,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x85,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x85,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x85,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x84,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x84,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x84,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x84,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x84,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x84,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_add_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x84,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x84,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x84,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x84,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x84,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 +// GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 +// GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 +// GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 +// GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 +// GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x92 +// GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x92 ; encoding: [0x06,0x21,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x92,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 +// GFX1250: v_dual_mul_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x88,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x89,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x89,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x89,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x89,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x89,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x89,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x89,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x89,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x89,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x89,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x89,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x89,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x88,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x89,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x89,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x89,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x89,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x89,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x89,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x89,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x89,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x89,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x89,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x89,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x89,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x88,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x88,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x88,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x88,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x88,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x88,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_mul_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x88,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x88,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x88,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x88,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x88,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 +// GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 +// GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 +// GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 +// GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 +// GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x93 +// GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x93 ; encoding: [0x06,0x21,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x93,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 +// GFX1250: v_dual_max_num_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x8d,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x8d,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x8d,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x8d,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x8d,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x8d,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x8d,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x8d,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x8d,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x8d,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x8d,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x8d,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x8c,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x8d,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x8d,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x8d,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x8d,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x8d,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x8d,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x8d,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x8d,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x8d,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x8d,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x8d,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x8d,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x8c,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x8c,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x8c,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x8c,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x8c,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x8c,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_max_num_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x8c,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x8c,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x8c,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x8c,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x8c,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 +// GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 +// GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 +// GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 +// GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x94 +// GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x94 ; encoding: [0x06,0x21,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x94,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 +// GFX1250: v_dual_min_num_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x90,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x91,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x91,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x91,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x91,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x91,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x91,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x91,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x91,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x91,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x91,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x91,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x91,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x90,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x91,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x91,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x91,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x91,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x91,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x91,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x91,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x91,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x91,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x91,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x91,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x91,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x90,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x90,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x90,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x90,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x90,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x90,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 +// GFX1250: v_dual_min_num_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x90,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x90,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x90,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x90,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x90,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 +// GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 +// GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 +// GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 +// GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 +// GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 +// GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 +// GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x95 +// GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x95 ; encoding: [0x06,0x21,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x95,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +//===----------------------------------------------------------------------===// +// Neg modifiers support. +//===----------------------------------------------------------------------===// + +v_dual_fma_f32 v0, -v1, v2, v3 :: v_dual_fma_f32 v5, v6, v7, v8 +// GFX1250: v_dual_fma_f32 v0, -v1, v2, v3 :: v_dual_fma_f32 v5, v6, v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x03,0x02,0x03,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v0, v1, -v2, v3 :: v_dual_fma_f32 v5, v6, v7, v8 +// GFX1250: v_dual_fma_f32 v0, v1, -v2, v3 :: v_dual_fma_f32 v5, v6, v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x05,0x02,0x03,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v0, v1, v2, -v3 :: v_dual_fma_f32 v5, v6, v7, v8 +// GFX1250: v_dual_fma_f32 v0, v1, v2, -v3 :: v_dual_fma_f32 v5, v6, v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x09,0x02,0x03,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, -v6, v7, v8 +// GFX1250: v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, -v6, v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x11,0x02,0x03,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, v6, -v7, v8 +// GFX1250: v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, v6, -v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x21,0x02,0x03,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, v6, v7, -v8 +// GFX1250: v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, v6, v7, -v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x41,0x02,0x03,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v0, -s1, v2, v3 :: v_dual_bitop2_b32 v5, v6, v7 +// GFX1250: v_dual_fma_f32 v0, -s1, v2, v3 :: v_dual_bitop2_b32 v5, v6, v7 ; encoding: [0x01,0x20,0x4d,0xcf,0x06,0x03,0x02,0x03,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v0, v1, v2 :: v_dual_add_f32 v5, -s6, v7 +// GFX1250: v_dual_add_f32 v0, v1, v2 :: v_dual_add_f32 v5, -s6, v7 ; encoding: [0x01,0x41,0x10,0xcf,0x06,0x10,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v0, -v1, v2 :: v_dual_add_nc_u32 v5, v6, v7 +// GFX1250: v_dual_add_f32 v0, -v1, v2 :: v_dual_add_nc_u32 v5, v6, v7 ; encoding: [0x01,0x01,0x11,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v0, v1, v2, vcc_lo :: v_dual_fmac_f32 v5, -v6, -v7 +// GFX1250: v_dual_cndmask_b32 v0, v1, v2, vcc_lo :: v_dual_fmac_f32 v5, -v6, -v7 ; encoding: [0x01,0x01,0x24,0xcf,0x06,0x31,0x02,0x6a,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v0, -v1, -v2 :: v_dual_ashrrev_i32 v5, v6, v7 +// GFX1250: v_dual_fmac_f32 v0, -v1, -v2 :: v_dual_ashrrev_i32 v5, v6, v7 ; encoding: [0x01,0x61,0x01,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v0, v1, -v2 :: v_dual_fmac_f32 v5, -v6, v7 +// GFX1250: v_dual_fmac_f32 v0, v1, -v2 :: v_dual_fmac_f32 v5, -v6, v7 ; encoding: [0x01,0x01,0x00,0xcf,0x06,0x15,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v0, v1, -v2 :: v_dual_cndmask_b32 v5, v6, v7, vcc_lo +// GFX1250: v_dual_max_num_f32 v0, v1, -v2 :: v_dual_cndmask_b32 v5, v6, v7, vcc_lo ; encoding: [0x01,0x91,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x6a,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v0, -v1, v2 :: v_dual_min_num_f32 v5, v6, v7 +// GFX1250: v_dual_max_num_f32 v0, -v1, v2 :: v_dual_min_num_f32 v5, v6, v7 ; encoding: [0x01,0xb1,0x28,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v0, v1 :: v_dual_max_num_f32 v5, -s6, -v7 +// GFX1250: v_dual_mov_b32 v0, v1 :: v_dual_max_num_f32 v5, -s6, -v7 ; encoding: [0x01,0xa1,0x20,0xcf,0x06,0x30,0x00,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v0, -v1, v2 :: v_dual_lshlrev_b32 v5, v6, v7 +// GFX1250: v_dual_mul_dx9_zero_f32 v0, -v1, v2 :: v_dual_lshlrev_b32 v5, v6, v7 ; encoding: [0x01,0x11,0x1d,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v0, v1, -v2 :: v_dual_mul_f32 v5, v6, -v7 +// GFX1250: v_dual_mul_dx9_zero_f32 v0, v1, -v2 :: v_dual_mul_f32 v5, v6, -v7 ; encoding: [0x01,0x31,0x1c,0xcf,0x06,0x25,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v0, -v1, -v2 :: v_dual_lshrrev_b32 v5, v6, v7 +// GFX1250: v_dual_mul_f32 v0, -v1, -v2 :: v_dual_lshrrev_b32 v5, v6, v7 ; encoding: [0x01,0x51,0x0d,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v0, v1, v2 :: v_dual_mul_dx9_zero_f32 v5, -v6, -v7 +// GFX1250: v_dual_mul_f32 v0, v1, v2 :: v_dual_mul_dx9_zero_f32 v5, -v6, -v7 ; encoding: [0x01,0x71,0x0c,0xcf,0x06,0x31,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v0, v1, -v2 :: v_dual_max_i32 v5, v6, v7 +// GFX1250: v_dual_sub_f32 v0, v1, -v2 :: v_dual_max_i32 v5, v6, v7 ; encoding: [0x01,0x71,0x15,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v0, -v1, v2 :: v_dual_mul_f32 v5, -v6, -v7 +// GFX1250: v_dual_sub_f32 v0, -v1, v2 :: v_dual_mul_f32 v5, -v6, -v7 ; encoding: [0x01,0x31,0x14,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v0, v1, -v2 :: v_dual_min_i32 v5, v6, v7 +// GFX1250: v_dual_subrev_f32 v0, v1, -v2 :: v_dual_min_i32 v5, v6, v7 ; encoding: [0x01,0x81,0x19,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v0, v1, -v2 :: v_dual_sub_f32 v5, -v6, v7 +// GFX1250: v_dual_subrev_f32 v0, v1, -v2 :: v_dual_sub_f32 v5, -v6, v7 ; encoding: [0x01,0x51,0x18,0xcf,0x06,0x15,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[0:1], -v[8:9], v[4:5] :: v_dual_mov_b32 v5, v6 +// GFX1250: v_dual_add_f64 v[0:1], -v[8:9], v[4:5] :: v_dual_mov_b32 v5, v6 ; encoding: [0x08,0x81,0x84,0xcf,0x06,0x03,0x04,0x00,0x00,0x00,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[0:1], -s[8:9], -v[4:5] :: v_dual_subrev_f32 v5, v6, -v7 +// GFX1250: v_dual_add_f64 v[0:1], -s[8:9], -v[4:5] :: v_dual_subrev_f32 v5, v6, -v7 ; encoding: [0x08,0x60,0x84,0xcf,0x06,0x27,0x04,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v0, v1, v2 :: v_dual_fma_f32 v5, -v6, v7, -v8 +// GFX1250: v_dual_add_nc_u32 v0, v1, v2 :: v_dual_fma_f32 v5, -v6, v7, -v8 ; encoding: [0x01,0x31,0x41,0xcf,0x06,0x51,0x02,0x00,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v0, v1, v2, vcc_lo :: v_dual_add_f32 v5, -s6, -v7 +// GFX1250: v_dual_cndmask_b32 v0, v1, v2, vcc_lo :: v_dual_add_f32 v5, -s6, -v7 ; encoding: [0x01,0x41,0x24,0xcf,0x06,0x30,0x02,0x6a,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[0:1], -v[8:9], -v[4:5], -v[10:11] :: v_dual_add_nc_u32 v5, v6, v7 +// GFX1250: v_dual_fma_f64 v[0:1], -v[8:9], -v[4:5], -v[10:11] :: v_dual_add_nc_u32 v5, v6, v7 ; encoding: [0x08,0x01,0x81,0xcf,0x06,0x0f,0x04,0x0a,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[0:1], v[8:9], v[4:5], -v[10:11] :: v_dual_fma_f32 v5, v6, v7, -v8 +// GFX1250: v_dual_fma_f64 v[0:1], v[8:9], v[4:5], -v[10:11] :: v_dual_fma_f32 v5, v6, v7, -v8 ; encoding: [0x08,0x31,0x81,0xcf,0x06,0x49,0x04,0x0a,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v0, v1, v2 :: v_dual_min_num_f32 v5, -s6, -v7 +// GFX1250: v_dual_lshlrev_b32 v0, v1, v2 :: v_dual_min_num_f32 v5, -s6, -v7 ; encoding: [0x01,0xb1,0x44,0xcf,0x06,0x30,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v0, v1, -v2 :: v_dual_mov_b32 v5, v6 +// GFX1250: v_dual_max_num_f32 v0, v1, -v2 :: v_dual_mov_b32 v5, v6 ; encoding: [0x01,0x81,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x00,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v0, -v1, v2 :: v_dual_mul_dx9_zero_f32 v5, -v6, -v7 +// GFX1250: v_dual_max_num_f32 v0, -v1, v2 :: v_dual_mul_dx9_zero_f32 v5, -v6, -v7 ; encoding: [0x01,0x71,0x28,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[0:1], -v[8:9], -v[4:5] :: v_dual_mul_f32 v5, -v6, v7 +// GFX1250: v_dual_max_num_f64 v[0:1], -v[8:9], -v[4:5] :: v_dual_mul_f32 v5, -v6, v7 ; encoding: [0x08,0x31,0x8c,0xcf,0x06,0x17,0x04,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[0:1], v[8:9], -v[4:5] :: v_dual_sub_nc_u32 v5, v6, v7 +// GFX1250: v_dual_max_num_f64 v[0:1], v[8:9], -v[4:5] :: v_dual_sub_nc_u32 v5, v6, v7 ; encoding: [0x08,0x41,0x8d,0xcf,0x06,0x05,0x04,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v0, v1, -v2 :: v_dual_add_nc_u32 v5, v6, v7 +// GFX1250: v_dual_min_num_f32 v0, v1, -v2 :: v_dual_add_nc_u32 v5, v6, v7 ; encoding: [0x01,0x01,0x2d,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v0, -v1, v2 :: v_dual_sub_f32 v5, -v6, -v7 +// GFX1250: v_dual_min_num_f32 v0, -v1, v2 :: v_dual_sub_f32 v5, -v6, -v7 ; encoding: [0x01,0x51,0x2c,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[0:1], -s[8:9], v[4:5] :: v_dual_ashrrev_i32 v5, v6, v7 +// GFX1250: v_dual_min_num_f64 v[0:1], -s[8:9], v[4:5] :: v_dual_ashrrev_i32 v5, v6, v7 ; encoding: [0x08,0x60,0x91,0xcf,0x06,0x03,0x04,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[0:1], -v[8:9], -v[4:5] :: v_dual_subrev_f32 v5, v6, v7 +// GFX1250: v_dual_min_num_f64 v[0:1], -v[8:9], -v[4:5] :: v_dual_subrev_f32 v5, v6, v7 ; encoding: [0x08,0x61,0x90,0xcf,0x06,0x07,0x04,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v0, v1, -v2 :: v_dual_bitop2_b32 v5, v6, v7 bitop3:1 +// GFX1250: v_dual_mul_dx9_zero_f32 v0, v1, -v2 :: v_dual_bitop2_b32 v5, v6, v7 bitop3:1 ; encoding: [0x01,0x21,0x1d,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x01,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v0, -v1, v2 :: v_dual_fma_f32 v5, -s6, -v7, -v8 +// GFX1250: v_dual_mul_dx9_zero_f32 v0, -v1, v2 :: v_dual_fma_f32 v5, -s6, -v7, -v8 ; encoding: [0x01,0x31,0x1d,0xcf,0x06,0x72,0x02,0x00,0x00,0x07,0x08,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v0, -v1, -v2 :: v_dual_bitop2_b32 v5, v6, v7 bitop3:100 +// GFX1250: v_dual_mul_f32 v0, -v1, -v2 :: v_dual_bitop2_b32 v5, v6, v7 bitop3:0x64 ; encoding: [0x01,0x21,0x0d,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x64,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v0, v1, v2 :: v_dual_fmac_f32 v5, -v6, -v7 +// GFX1250: v_dual_mul_f32 v0, v1, v2 :: v_dual_fmac_f32 v5, -v6, -v7 ; encoding: [0x01,0x01,0x0c,0xcf,0x06,0x31,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[0:1], -v[8:9], v[4:5] :: v_dual_add_f32 v5, -v6, v7 +// GFX1250: v_dual_mul_f64 v[0:1], -v[8:9], v[4:5] :: v_dual_add_f32 v5, -v6, v7 ; encoding: [0x08,0x41,0x88,0xcf,0x06,0x13,0x04,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[0:1], v[8:9], -v[4:5] :: v_dual_lshlrev_b32 v5, v6, v7 +// GFX1250: v_dual_mul_f64 v[0:1], v[8:9], -v[4:5] :: v_dual_lshlrev_b32 v5, v6, v7 ; encoding: [0x08,0x11,0x89,0xcf,0x06,0x05,0x04,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v0, -v1, -v2 :: v_dual_lshrrev_b32 v5, v6, v7 +// GFX1250: v_dual_sub_f32 v0, -v1, -v2 :: v_dual_lshrrev_b32 v5, v6, v7 ; encoding: [0x01,0x51,0x15,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_f32 v0, v1, v2 :: v_dual_min_num_f32 v5, v6, -v7 +// GFX1250: v_dual_sub_f32 v0, v1, v2 :: v_dual_min_num_f32 v5, v6, -v7 ; encoding: [0x01,0xb1,0x14,0xcf,0x06,0x21,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v0, v1, v2 :: v_dual_mul_dx9_zero_f32 v5, v6, v7 +// GFX1250: v_dual_sub_nc_u32 v0, v1, v2 :: v_dual_mul_dx9_zero_f32 v5, v6, v7 ; encoding: [0x01,0x71,0x50,0xcf,0x06,0x01,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v0, v1, -v2 :: v_dual_max_i32 v5, v6, v7 +// GFX1250: v_dual_subrev_f32 v0, v1, -v2 :: v_dual_max_i32 v5, v6, v7 ; encoding: [0x01,0x71,0x19,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v0, -s1, -v2 :: v_dual_mul_f32 v5, -s6, -v7 +// GFX1250: v_dual_subrev_f32 v0, -s1, -v2 :: v_dual_mul_f32 v5, -s6, -v7 ; encoding: [0x01,0x30,0x18,0xcf,0x06,0x36,0x02,0x00,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_add_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_add_nc_u32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s97 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s97 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x24,0xcf,0x01,0x01,0x02,0x61,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_fmac_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_lshlrev_b32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_max_num_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_min_num_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v255, s96 :: v_dual_mov_b32 v7, v1 +// GFX1250: v_dual_cndmask_b32 v255, v4, v255, s96 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x24,0xcf,0x01,0x01,0xff,0x60,0xff,0x00,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_mul_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_sub_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_subrev_f32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_fma_f32 v7, v1, v3, v4 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x04,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:1 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:1 ; encoding: [0x04,0x21,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x01,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v7, v1, v255, s96 +// GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v7, v1, v255, s96 ; encoding: [0x04,0x91,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_max_i32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_min_i32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_sub_nc_u32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_lshrrev_b32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_ashrrev_i32 v7, v1, v3 +// GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_cndmask_b32 v7, v1, v3, s96 +// GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v2, v3, s96 +// GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 +// GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 +// GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 +// GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 +// GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v0, v1, v2, s96 :: v_dual_fmac_f32 v5, -v6, -v7 +// GFX1250: v_dual_cndmask_b32 v0, v1, v2, s96 :: v_dual_fmac_f32 v5, -v6, -v7 ; encoding: [0x01,0x01,0x24,0xcf,0x06,0x31,0x02,0x60,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_max_num_f32 v0, v1, -v2 :: v_dual_cndmask_b32 v5, v6, v7, s96 +// GFX1250: v_dual_max_num_f32 v0, v1, -v2 :: v_dual_cndmask_b32 v5, v6, v7, s96 ; encoding: [0x01,0x91,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x60,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 + +v_dual_cndmask_b32 v0, v1, v2, s96 :: v_dual_add_f32 v5, -s6, -v7 +// GFX1250: v_dual_cndmask_b32 v0, v1, v2, s96 :: v_dual_add_f32 v5, -s6, -v7 ; encoding: [0x01,0x41,0x24,0xcf,0x06,0x30,0x02,0x60,0x00,0x07,0x00,0x05] +// W64-ERR: :[[@LINE-2]]:1: error: instruction requires wavesize=32 diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s new file mode 100644 index 0000000000000..81b79cb8c28da --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s @@ -0,0 +1,326 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 %s 2>&1 | FileCheck %s -check-prefix=GFX12 --implicit-check-not=error: --strict-whitespace + +//===----------------------------------------------------------------------===// +// A VOPD instruction can use only one literal. +//===----------------------------------------------------------------------===// + +v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0xbabe, v5 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0xbabe, v5 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// When 2 different literals are specified, show the location +// of the last literal which is not a KImm, if any. +//===----------------------------------------------------------------------===// + +v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, 0xbabe, v99 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, 0xbabe, v99 +// GFX12-NEXT:{{^}} ^ + +v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xbabe +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xbabe +// GFX12-NEXT:{{^}} ^ + +v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, 0xbabe, v1, 0xbabe +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, 0xbabe, v1, 0xbabe +// GFX12-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0x1234, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0x1234, v162 +// GFX12-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0, 0x1234, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0, 0x1234, v162 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// Check that assembler detects a different literal regardless of its location. +//===----------------------------------------------------------------------===// + +v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0x1234, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0x1234, v162 +// GFX12-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0x1234, 0xdeadbeef, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0x1234, 0xdeadbeef, v162 +// GFX12-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v122, 0xdeadbeef, 0x1234, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, 0xdeadbeef, 0x1234, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 +// GFX12-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v122, 0x1234, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, 0x1234, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// When 2 different literals are specified and all literals are KImm, +// show the location of the last KImm literal. +//===----------------------------------------------------------------------===// + +v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0, 0x1234, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, s0, 0x1234, v162 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// A VOPD instruction cannot use more than 2 scalar operands +//===----------------------------------------------------------------------===// + +// 2 different SGPRs + LITERAL + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_max_i32 v247, s75, v98 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_max_i32 v247, s75, v98 +// GFX12-NEXT:{{^}} ^ + +v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_mov_b32 v247, s73 :: v_dual_fmaak_f32 v122, s74, v161, 2.741 +// GFX12-NEXT:{{^}} ^ + +v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_fmamk_f32 v122, s0, 0xbabe, v161 :: v_dual_fmamk_f32 v123, s1, 0xbabe, v162 +// GFX12-NEXT:{{^}} ^ + +// 2 different SGPRs + VCC + +v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3 +// GFX12-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v6, s1, v3 :: v_dual_add_f32 v255, s2, v2 +// GFX12-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s2, v3 +// GFX12-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v1, s2, v3, vcc_lo :: v_dual_cndmask_b32 v2, s3, v4, vcc_lo +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v1, s2, v3, vcc_lo :: v_dual_cndmask_b32 v2, s3, v4, vcc_lo +// GFX12-NEXT:{{^}} ^ + +// SGPR + LITERAL + VCC + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mov_b32 v254, 0xbabe +// GFX12-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_mov_b32 v254, s1 +// GFX12-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v255, s3, v2 :: v_dual_fmamk_f32 v254, v1, 0xbabe, v162 +// GFX12-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmamk_f32 v254, s3, 0xbabe, v162 +// GFX12-NEXT:{{^}} ^ + +// SGPR + VCC + VCC_LO +// This is a special case because implicit VCC operand has 64 bit size. +// SP3 does not accept this instruction as well. + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +// GFX12-NEXT:{{^}}v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12-NEXT:{{^}} ^ + +// FIXME: Error should be 'unsupported instruction' +v_dual_add_f32 v255, v4, v2 :: v_dual_and_b32 v6, v1, v3 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12-NEXT:{{^}}v_dual_add_f32 v255, v4, v2 :: v_dual_and_b32 v6, v1, v3 +// GFX12-NEXT:{{^}}^ + +v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmaak_f32 v7, v101, v3, 0xaf123456 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: one dst register must be even and the other odd +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmaak_f32 v7, v101, v3, 0xaf123456 +// GFX12-NEXT:{{^}} ^ + +v_dual_add_f32 v2, v2, v5 :: v_dual_mul_f32 v4, 130, v6 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: one dst register must be even and the other odd +// GFX12-NEXT:{{^}}v_dual_add_f32 v2, v2, v5 :: v_dual_mul_f32 v4, 130, v6 +// GFX12-NEXT:{{^}} ^ + +// Even though it could be represented as VOPD3, fmac reads its dst and bank constraints still apply to src2. +v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: one dst register must be even and the other odd +// GFX12-NEXT:{{^}}v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 +// GFX12-NEXT:{{^}} ^ + +// Destination should be distinct even if not checked for parity in VOPD3 +v_dual_fmac_f32 v7, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: dst registers must be distinct +// GFX12-NEXT:{{^}}v_dual_fmac_f32 v7, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 +// GFX12-NEXT:{{^}} ^ + +v_dual_add_f32 v7, v4, v2 :: v_dual_add_f32 v7, v5, v3 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: dst registers must be distinct +// GFX12-NEXT:{{^}}v_dual_add_f32 v7, v4, v2 :: v_dual_add_f32 v7, v5, v3 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// A 64-bit operand shall not have bank conflicts with both subregs. +// There is also NO exception that a 64 bit operand can start whith the same +// register as 32 bit. +//===----------------------------------------------------------------------===// +v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v8, v6 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src0 operands must use different VGPR banks +// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v8, v6 +// GFX12-NEXT:{{^}} ^ + +v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v9, v6 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src0 operands must use different VGPR banks +// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v9, v6 +// GFX12-NEXT:{{^}} ^ + +v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v4, v6 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src0 operands must use different VGPR banks +// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v4, v6 +// GFX12-NEXT:{{^}} ^ + +v_dual_add_f64 v[2:3], 1, v[8:9] :: v_dual_ashrrev_i32 v3, v7, v6 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: dst registers must be distinct +// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], 1, v[8:9] :: v_dual_ashrrev_i32 v3, v7, v6 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// Literals not supported by VOPD3. Inline literals can only be encoded for +// src0, but not for vsrc1 or vsrc2. +//===----------------------------------------------------------------------===// +v_dual_add_f64 v[2:3], 100.0, v[8:9] :: v_dual_ashrrev_i32 v4, v7, v6 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], 100.0, v[8:9] :: v_dual_ashrrev_i32 v4, v7, v6 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v255, s105, v2, v255 :: v_dual_fma_f32 v7, 1, 0, v8 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v2, v255 :: v_dual_fma_f32 v7, 1, 0, v8 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v255, s105, v2, v255 :: v_dual_fma_f32 v7, 1, v0, 0 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v2, v255 :: v_dual_fma_f32 v7, 1, v0, 0 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// Check that we properly detect bank conflicts if instruction is derived from +// VOP3. +//===----------------------------------------------------------------------===// +v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v8, v7, v6 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src0 operands must use different VGPR banks +// GFX12-NEXT:{{^}}v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v8, v7, v6 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v5, v6, v8 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src1 operands must use different VGPR banks +// GFX12-NEXT:{{^}}v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v5, v6, v8 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v5, v8, v7 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src2 operands must use different VGPR banks +// GFX12-NEXT:{{^}}v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v5, v8, v7 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fmac_f32 v7, v5, v8 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src2 operands must use different VGPR banks +// GFX12-NEXT:{{^}}v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fmac_f32 v7, v5, v8 +// GFX12-NEXT:{{^}} ^ + +v_dual_fmac_f32 v7, v5, v8 :: v_dual_fma_f32 v1, v4, v2, v3 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src2 operands must use different VGPR banks +// GFX12-NEXT:{{^}}v_dual_fmac_f32 v7, v5, v8 :: v_dual_fma_f32 v1, v4, v2, v3 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// ABS modifiers are not supported +//===----------------------------------------------------------------------===// +v_dual_fma_f32 v255, |s105|, v0, v1 :: v_dual_add_nc_u32 v7, s1, v0 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, |s105|, v0, v1 :: v_dual_add_nc_u32 v7, s1, v0 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v255, s105, abs(v0), v1 :: v_dual_fma_f32 v7, s1, v0, v8 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, abs(v0), v1 :: v_dual_fma_f32 v7, s1, v0, v8 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v255, s105, v0, |v1| :: v_dual_fma_f32 v7, s1, v0, v8 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v0, |v1| :: v_dual_fma_f32 v7, s1, v0, v8 +// GFX12-NEXT:{{^}} ^ + +v_dual_add_nc_u32 v255, s105, v0 :: v_dual_fma_f32 v7, |1|, v0, v8 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions +// GFX12-NEXT:{{^}}v_dual_add_nc_u32 v255, s105, v0 :: v_dual_fma_f32 v7, |1|, v0, v8 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v255, s105, v0, v1 :: v_dual_fma_f32 v7, s1, -|v0|, v8 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v0, v1 :: v_dual_fma_f32 v7, s1, -|v0|, v8 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v255, s105, v0, v1 :: v_dual_fma_f32 v7, s1, v0, -abs(v8) +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v0, v1 :: v_dual_fma_f32 v7, s1, v0, -abs(v8) +// GFX12-NEXT:{{^}} ^ + +v_dual_mul_f64 v[6:7], -|v[2:3]|, v[4:5] :: v_dual_fma_f32 v255, -s105, v2, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions +// GFX12-NEXT:{{^}}v_dual_mul_f64 v[6:7], -|v[2:3]|, v[4:5] :: v_dual_fma_f32 v255, -s105, v2, v1 +// GFX12-NEXT:{{^}} ^ + +//===----------------------------------------------------------------------===// +// No modifiers on non-fp part of an instruction +//===----------------------------------------------------------------------===// +v_dual_fma_f32 v255, -s105, v0, v1 :: v_dual_lshrrev_b32 v7, -s1, v0 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, -s105, v0, v1 :: v_dual_lshrrev_b32 v7, -s1, v0 +// GFX12-NEXT:{{^}} ^ + +v_dual_fma_f32 v255, -s105, v0, v1 :: v_dual_max_i32 v7, s1, -v0 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, -s105, v0, v1 :: v_dual_max_i32 v7, s1, -v0 +// GFX12-NEXT:{{^}} ^ + +v_dual_add_nc_u32 v7, -s1, v0 :: v_dual_fma_f32 v255, -s105, v0, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand +// GFX12-NEXT:{{^}}v_dual_add_nc_u32 v7, -s1, v0 :: v_dual_fma_f32 v255, -s105, v0, v1 +// GFX12-NEXT:{{^}} ^ + +v_dual_sub_nc_u32 v7, s1, -v0 :: v_dual_fma_f32 v255, -s105, v0, v1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand +// GFX12-NEXT:{{^}}v_dual_sub_nc_u32 v7, s1, -v0 :: v_dual_fma_f32 v255, -s105, v0, v1 +// GFX12-NEXT:{{^}} ^ + +v_dual_cndmask_b32 v28, sext(v15), v15, s46 :: v_dual_cndmask_b32 v29, v13, -v13, s46 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v28, sext(v15), v15, s46 :: v_dual_cndmask_b32 v29, v13, -v13, s46 +// GFX12-NEXT:{{^}} ^ + + +v_dual_cndmask_b32 v28, -v15, v15, s46 :: v_dual_cndmask_b32 v29, sext(v13), -v13, s46 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand +// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v28, -v15, v15, s46 :: v_dual_cndmask_b32 v29, sext(v13), -v13, s46 +// GFX12-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_features.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_features.s new file mode 100644 index 0000000000000..cdd9f301e2506 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_features.s @@ -0,0 +1,109 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck -check-prefix=GFX12 %s + +//===----------------------------------------------------------------------===// +// A VOPD instruction can use one or more literals, +// provided that they are identical. +//===----------------------------------------------------------------------===// + +// LITERAL + +v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 +// GFX12: encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] + +// LITERAL*2 + +v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5 +// GFX12: encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24] + +// LITERAL + KIMM + +v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; +// GFX12: encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf] + +// KIMM + LITERAL + +v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, 0xa0172923, v99 +// GFX12: encoding: [0x4a,0x43,0xa3,0xc8,0xff,0xc6,0xf6,0x7a,0x23,0x29,0x17,0xa0] + +// KIMM*2 + +v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162 +// GFX12: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde] + +//===----------------------------------------------------------------------===// +// A VOPD instruction can use 2 scalar operands, +// but implicit VCC must be counted in. +//===----------------------------------------------------------------------===// + +// 2 different SGPRs + +v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5 +// GFX12: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00] + +// SGPR + LITERAL + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_max_i32 v247, v160, v98 +// GFX12: encoding: [0x4a,0x42,0x6f,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] + +v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741 +// GFX12: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40] + +// SGPR*2 + LITERAL + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_max_i32 v247, s74, v98 +// GFX12: encoding: [0x4a,0x42,0x6f,0xc8,0x4a,0xc4,0xf6,0x7a,0x8b,0x6c,0x2f,0x40] + +// SGPR + LITERAL*2 + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1 +// GFX12: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40] + +// SGPR*2 + LITERAL*2 + +v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, s74, 2.741, v1 +// GFX12: encoding: [0x4a,0x42,0x45,0xc8,0x4a,0x02,0x02,0x7a,0x8b,0x6c,0x2f,0x40] + +// LITERAL + VCC + +v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, v2, v3 +// GFX12: encoding: [0x00,0x43,0x53,0xc8,0x02,0x07,0x00,0x7a,0x8b,0x6c,0x2f,0x40] + +// LITERAL*2 + VCC + +v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, 2.741, v3 +// GFX12: encoding: [0x00,0x43,0x53,0xc8,0xff,0x06,0x00,0x7a,0x8b,0x6c,0x2f,0x40] + +// LITERAL*2 + VCC*2 + +v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_cndmask_b32 v6, 0xbabe, v3 +// GFX12: encoding: [0xff,0x04,0x52,0xca,0xff,0x06,0x06,0xff,0xbe,0xba,0x00,0x00] + +// SGPR*2 + VCC + +v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 +// GFX12: encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff] + +// SGPR*2 + VCC*2 + +v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 +// GFX12: encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff] + +// VCC*2 + +v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, v1, v3 +// GFX12: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff] + +//===----------------------------------------------------------------------===// +// A VOPD OpY mov_b32 instruction uses SRC2 source-cache if OpX is also mov_b32 +//===----------------------------------------------------------------------===// + +v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v1 +// GFX12: encoding: [0x05,0x01,0x10,0xca,0x01,0x01,0x02,0x02] + +//===----------------------------------------------------------------------===// +// SRCX0 and SRCY0 may use the same bank if they are using the same VGPR; same for +// VSRCX1 and VSRCY1. +//===----------------------------------------------------------------------===// + +v_dual_add_f32 v2, v2, v5 :: v_dual_mul_f32 v3, v2, v5 +// GFX12: encoding: [0x02,0x0b,0x06,0xc9,0x02,0x0b,0x02,0x02] diff --git a/llvm/test/MC/AMDGPU/gfx1250_err.s b/llvm/test/MC/AMDGPU/gfx1250_err.s index ddb6d9520ce1d..e04c6aa930150 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_err.s @@ -1,7 +1,42 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX1250-ERR --implicit-check-not=error: -strict-whitespace %s +// For v_dual_cndmask_b32 use of the explicit src2 forces VOPD3 form even if it is vcc_lo. +// If src2 is omitted then it forces VOPD form. As a result a proper form of the instruction +// has to be used if the other component of the dual instruction cannot be used if that +// encoding. + +v_dual_cndmask_b32 v2, v4, v1 :: v_dual_fma_f32 v7, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid VOPDY instruction +// GFX1250-ERR: v_dual_cndmask_b32 v2, v4, v1 :: v_dual_fma_f32 v7, v1, v2, v3 +// GFX1250-ERR: ^ + +v_dual_fma_f32 v7, v1, v2, v3 :: v_dual_cndmask_b32 v2, v4, v1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: too few operands for instruction +// GFX1250-ERR: v_dual_fma_f32 v7, v1, v2, v3 :: v_dual_cndmask_b32 v2, v4, v1 +// GFX1250-ERR: ^ + +v_dual_cndmask_b32 v7, v1, v2 :: v_dual_cndmask_b32 v2, v4, v1, vcc_lo +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX1250-ERR: v_dual_cndmask_b32 v7, v1, v2 :: v_dual_cndmask_b32 v2, v4, v1, vcc_lo +// GFX1250-ERR: ^ + +v_dual_cndmask_b32 v7, v1, v2, vcc_lo :: v_dual_cndmask_b32 v2, v4, v1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: too few operands for instruction +// GFX1250-ERR: v_dual_cndmask_b32 v7, v1, v2, vcc_lo :: v_dual_cndmask_b32 v2, v4, v1 +// GFX1250-ERR: ^ + // Check for unique 64-bit literal +v_mov_b64 v[4:5], v[2:3] quad_perm:[1,1,1,1] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR: v_mov_b64 v[4:5], v[2:3] quad_perm:[1,1,1,1] +// GFX1250-ERR: ^ + +v_mov_b64 v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR: v_mov_b64 v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250-ERR: ^ + s_andn2_b64 s[2:3], 0x10abcdef12345678, 0xabcdef12345678 // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed // GFX1250-ERR: s_andn2_b64 s[2:3], 0x10abcdef12345678, 0xabcdef12345678 @@ -61,3 +96,43 @@ v_ceil_f64 v[2:3], lit64(123 // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected closing parentheses // GFX1250-ERR: v_ceil_f64 v[2:3], lit64(123 // GFX1250-ERR: ^ + +v_fmaak_f64 v[4:5], lit(lit64(0x7e8)), v[8:9], lit64(0x7e8) +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR: v_fmaak_f64 v[4:5], lit(lit64(0x7e8)), v[8:9], lit64(0x7e8) +// GFX1250-ERR: ^ + +v_fmaak_f64 v[4:5], lit64(lit64(0x7e8)), v[8:9], lit64(0x7e8) +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR: v_fmaak_f64 v[4:5], lit64(lit64(0x7e8)), v[8:9], lit64(0x7e8) +// GFX1250-ERR: ^ + +v_fmaak_f64 v[4:5], lit64(lit(0x7e8)), v[8:9], lit64(0x7e8) +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR: v_fmaak_f64 v[4:5], lit64(lit(0x7e8)), v[8:9], lit64(0x7e8) +// GFX1250-ERR: ^ + +v_fmamk_f64 v[4:5], 123.0, 123.1, v[6:7] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX1250-ERR: v_fmamk_f64 v[4:5], 123.0, 123.1, v[6:7] +// GFX1250-ERR: ^ + +v_fmamk_f64 v[4:5], 0x405ec00000000001, 123.0, v[6:7] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX1250-ERR: v_fmamk_f64 v[4:5], 0x405ec00000000001, 123.0, v[6:7] +// GFX1250-ERR: ^ + +v_fmaak_f64 v[4:5], 123.1, v[6:7], 123.0 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX1250-ERR: v_fmaak_f64 v[4:5], 123.1, v[6:7], 123.0 +// GFX1250-ERR: ^ + +v_fmaak_f64 v[4:5], 123.0, v[6:7], 0x405ec00000000001 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX1250-ERR: v_fmaak_f64 v[4:5], 123.0, v[6:7], 0x405ec00000000001 +// GFX1250-ERR: ^ + +v_fmaak_f64 v[4:5], 0x7e8, v[8:9], lit64(0x7e8) +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed +// GFX1250-ERR: v_fmaak_f64 v[4:5], 0x7e8, v[8:9], lit64(0x7e8) +// GFX1250-ERR: ^ diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s index 4b5efd00a7adf..85978b04779d0 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s @@ -88,4 +88,7 @@ v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05] v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, -v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt new file mode 100644 index 0000000000000..89731fcc936e6 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt @@ -0,0 +1,13 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +# GFX1250: s_barrier_signal -3 ; encoding: [0xc3,0x4e,0x80,0xbe] +0xc3,0x4e,0x80,0xbe + +# GFX1250: s_get_barrier_state s3, -3 ; encoding: [0xc3,0x50,0x83,0xbe] +0xc3,0x50,0x83,0xbe + +# GFX1250: s_get_barrier_state s3, -4 ; encoding: [0xc4,0x50,0x83,0xbe] +0xc4,0x50,0x83,0xbe + +# GFX1250: s_get_barrier_state s3, m0 ; encoding: [0x7d,0x50,0x83,0xbe] +0x7d,0x50,0x83,0xbe diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt index 220f9e5084f0e..e7026df3c0e2b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt @@ -1,5 +1,20 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# GFX1250: s_wait_asynccnt 0x1234 ; encoding: [0x34,0x12,0xca,0xbf] +0x34,0x12,0xca,0xbf + +# GFX1250: s_wait_asynccnt 0xc1d1 ; encoding: [0xd1,0xc1,0xca,0xbf] +0xd1,0xc1,0xca,0xbf + +# GFX1250: s_wait_tensorcnt 0x0 ; encoding: [0x00,0x00,0xcb,0xbf] +0x00,0x00,0xcb,0xbf + +# GFX1250: s_wait_tensorcnt 0x1 ; encoding: [0x01,0x00,0xcb,0xbf] +0x01,0x00,0xcb,0xbf + +# GFX1250: s_wait_tensorcnt 0x3 ; encoding: [0x03,0x00,0xcb,0xbf] +0x03,0x00,0xcb,0xbf + # GFX1250: s_wait_xcnt 0x0 ; encoding: [0x00,0x00,0xc5,0xbf] 0x00,0x00,0xc5,0xbf diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt new file mode 100644 index 0000000000000..f0fcddb06599f --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt @@ -0,0 +1,110 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +0xfe,0xfc,0xfd,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[254:255], 0x405ec000 ; encoding: [0xfe,0xfc,0xfd,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[2:3], 0x405ec000 ; encoding: [0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] + +0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[6:7], lit64(0x405ec66666666666) ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] + +0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[8:9], lit64(0x405ec66666666666) ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] + +0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f +# GFX1250: v_fmaak_f64 v[4:5], 1.0, v[8:9], 0x3ff00000 ; encoding: [0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f] + +0xfe,0x0c,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00 +# GFX1250: v_fmaak_f64 v[4:5], lit64(0x7e8), v[6:7], lit64(0x7e8) ; encoding: [0xfe,0x0c,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] + +0xfe,0x10,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00 +# GFX1250: v_fmaak_f64 v[4:5], lit64(0x7e8), v[8:9], lit64(0x7e8) ; encoding: [0xfe,0x10,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] + +0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], lit64(0x405ec66666666666) ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] + +0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], -1, v[8:9], 0x405ec000 ; encoding: [0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xf0,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], 0.5, v[8:9], 0x405ec000 ; encoding: [0xf0,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x7e,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], exec, v[8:9], 0x405ec000 ; encoding: [0x7e,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x7c,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], null, v[8:9], 0x405ec000 ; encoding: [0x7c,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], lit64(0x405ec00012345678) ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] + +0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], src_scc, v[8:9], 0x405ec000 ; encoding: [0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xfe,0x11,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], v[254:255], v[8:9], 0x405ec000 ; encoding: [0xfe,0x11,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x04,0x11,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], v[4:5], v[8:9], 0x405ec000 ; encoding: [0x04,0x11,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x6a,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmaak_f64 v[6:7], vcc, v[8:9], 0x405ec000 ; encoding: [0x6a,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xfe,0xfc,0xfd,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xfe,0x04,0xfc,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[2:3] ; encoding: [0xfe,0x04,0xfc,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[254:255], lit64(0x405ec00012345678), lit64(0x405ec00012345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] + +0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] + +0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] + +0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f +# GFX1250: v_fmamk_f64 v[4:5], 1.0, 0x3ff00000, v[6:7] ; encoding: [0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f] + +0xfe,0x0c,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00 +# GFX1250: v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] + +0xfe,0x10,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00 +# GFX1250: v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] + +0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[4:5], v[2:3], lit64(0x405ec66666666666), v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] + +0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], -1, 0x405ec000, v[2:3] ; encoding: [0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xf0,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], 0.5, 0x405ec000, v[2:3] ; encoding: [0xf0,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x7e,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], exec, 0x405ec000, v[2:3] ; encoding: [0x7e,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x7c,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], null, 0x405ec000, v[2:3] ; encoding: [0x7c,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], s[2:3], lit64(0x405ec00012345678), v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] + +0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], src_scc, 0x405ec000, v[2:3] ; encoding: [0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0xfe,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], v[254:255], 0x405ec000, v[2:3] ; encoding: [0xfe,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x04,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], v[4:5], 0x405ec000, v[2:3] ; encoding: [0x04,0x05,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] + +0x6a,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 +# GFX1250: v_fmamk_f64 v[6:7], vcc, 0x405ec000, v[2:3] ; encoding: [0x6a,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vopd.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vopd.txt new file mode 100644 index 0000000000000..119f80ab8bd86 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vopd.txt @@ -0,0 +1,12205 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX1250 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x08,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x08,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x20,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x20,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2c,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x2c,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x12,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x12,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x02,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x02,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x00,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x00,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x04,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x04,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x22,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x22,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2a,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x2a,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2e,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x2e,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x14,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x14,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x30,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x30,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x16,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x16,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x10,0xc9,0xfd,0x00,0x06,0xff] +0xc1,0x08,0x10,0xc9,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0e,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x0e,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x06,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x06,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0a,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x0a,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x28,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x28,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0c,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x0c,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x08,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x08,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x20,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x20,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2c,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x2c,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x12,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x12,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x02,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x02,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x00,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x00,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x04,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x04,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x22,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x22,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2a,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x2a,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2e,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x2e,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x14,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x14,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x30,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x30,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x16,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x16,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x10,0xc9,0xf0,0x00,0x06,0xff] +0xf0,0x06,0x10,0xc9,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0e,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x0e,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x06,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x06,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0a,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x0a,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x28,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x28,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0c,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x0c,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x08,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x08,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x20,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x20,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x2c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x12,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x12,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x02,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x00,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x00,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x22,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x22,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x2a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x2e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x14,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x14,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x30,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x30,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x16,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x16,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x0e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x06,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x06,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x0a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x28,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x28,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x0c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x05,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x05,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x11,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x11,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x08,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x08,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x20,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x20,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2c,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x2c,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x12,0xc9,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x12,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x02,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x02,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x00,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x00,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x22,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x22,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2a,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x2a,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2e,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x2e,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x14,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x14,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x30,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x30,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x16,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x16,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0e,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x0e,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x06,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x06,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0a,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x0a,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x28,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x28,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0c,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x0c,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x05,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x05,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x11,0xc9,0x6b,0x00,0x06,0xff] +0x7f,0xfe,0x11,0xc9,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x08,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x08,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x20,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x20,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2c,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x2c,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x12,0xc9,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x12,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x02,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x02,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x00,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x00,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x22,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x22,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2a,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x2a,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2e,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x2e,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x14,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x14,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x30,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x30,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x16,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x16,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0e,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x0e,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x06,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x06,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0a,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x0a,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x28,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x28,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0c,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x0c,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x05,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x05,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x11,0xc9,0x7b,0x00,0x06,0xff] +0x7e,0xfe,0x11,0xc9,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x08,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x08,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x20,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x20,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2c,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x2c,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x12,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x12,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x02,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x02,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x00,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x00,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x22,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x22,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2a,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x2a,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2e,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x2e,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x14,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x14,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x30,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x30,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x16,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x16,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0e,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x0e,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x06,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x06,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0a,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x0a,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x28,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x28,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0c,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x0c,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x05,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x05,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x11,0xc9,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0x11,0xc9,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x08,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x08,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x20,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x20,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x2c,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x2c,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x12,0xc9,0x01,0x06,0x06,0xff] +0x01,0x04,0x12,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x02,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x02,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x00,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x00,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x22,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x22,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x2a,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x2a,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x2e,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x2e,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x14,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x14,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x30,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x30,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x16,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x16,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0e,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x0e,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x06,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x06,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0a,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x0a,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x28,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x28,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0c,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x0c,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x05,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x05,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x11,0xc9,0x69,0x00,0x06,0xff] +0x01,0xfe,0x11,0xc9,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x08,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x08,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x20,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x20,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x2c,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x2c,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff] +0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x02,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x02,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x00,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x00,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x22,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x22,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x2a,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x2a,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x2e,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x2e,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x14,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x14,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x30,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x30,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x16,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x16,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0e,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x0e,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x06,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x06,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0a,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x0a,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x28,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x28,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0c,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x0c,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x05,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x05,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x11,0xc9,0x01,0x00,0x06,0xff] +0x69,0xfe,0x11,0xc9,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x08,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x08,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x20,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x20,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2c,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x2c,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x12,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x12,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x02,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x02,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x00,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x00,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x22,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x22,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2a,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x2a,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2e,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x2e,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x14,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x14,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x30,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x30,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x16,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x16,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0e,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x0e,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x06,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x06,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0a,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x0a,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x28,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x28,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0c,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x0c,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x05,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x05,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x11,0xc9,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0x11,0xc9,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x08,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x08,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x20,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x20,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2c,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x2c,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x12,0xc9,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x12,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x02,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x02,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x00,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x00,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x22,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x22,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2a,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x2a,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2e,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x2e,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x14,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x14,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x30,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x30,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x16,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x16,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0e,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x0e,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x06,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x06,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0a,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x0a,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x28,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x28,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0c,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x0c,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x05,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x05,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x11,0xc9,0x6a,0x00,0x06,0xff] +0x7b,0xfe,0x11,0xc9,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x08,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x08,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x20,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x20,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x2c,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x2c,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x12,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x12,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x02,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x02,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x00,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x00,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x22,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x22,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x2a,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x2a,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x2e,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x2e,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x14,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x14,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x30,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x30,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x16,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x16,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0e,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x0e,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x06,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x06,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0a,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x0a,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x28,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x28,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0c,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x0c,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x05,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x05,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x11,0xc9,0xff,0x01,0x06,0xff] +0x01,0xff,0x11,0xc9,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x08,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x08,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x20,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x20,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x2c,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x2c,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x12,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x12,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x02,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x02,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x00,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x00,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x22,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x22,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x2a,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x2a,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x2e,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x2e,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x14,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x14,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x30,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x30,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x16,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x16,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0e,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x0e,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x06,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x06,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0a,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x0a,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x28,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x28,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0c,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x0c,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x05,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x05,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x11,0xc9,0x03,0x01,0x06,0xff] +0x02,0xff,0x11,0xc9,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x08,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x08,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x20,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x20,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x2c,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x2c,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x12,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x12,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x02,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x02,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x00,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x00,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x22,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x22,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x2a,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x2a,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x2e,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x2e,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x14,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x14,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x30,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x30,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x16,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x16,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0e,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x0e,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x06,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x06,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0a,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x0a,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x28,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x28,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0c,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x0c,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x05,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x05,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x11,0xc9,0x02,0x01,0x06,0xff] +0xff,0xff,0x11,0xc9,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x08,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x08,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x20,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x20,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x2c,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x2c,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x12,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x12,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x02,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x02,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x00,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x00,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x22,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x22,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x2a,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x2a,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x2e,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x2e,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x14,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x14,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x30,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x30,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x16,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x16,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0e,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x0e,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x06,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x06,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0a,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x0a,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x28,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x28,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0c,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x0c,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x05,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x05,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x11,0xc9,0x04,0x01,0x06,0xff] +0x03,0xff,0x11,0xc9,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x08,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x08,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x20,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x20,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x2c,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x2c,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x12,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x12,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x02,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x02,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x00,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x00,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x22,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x22,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x2a,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x2a,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x2e,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x2e,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x14,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x14,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x30,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x30,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x16,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x16,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0e,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x0e,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x06,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x06,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0a,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x0a,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x28,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x28,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0c,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x0c,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x05,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x05,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x11,0xc9,0x01,0x01,0x06,0xff] +0x04,0xff,0x11,0xc9,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x08,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x08,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x20,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x20,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2c,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x2c,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x12,0xc9,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x12,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x02,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x02,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x00,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x00,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x22,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x22,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2a,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x2a,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2e,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x2e,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x14,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x14,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x30,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x30,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x16,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x16,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0e,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x0e,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x06,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x06,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0a,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x0a,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x28,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x28,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0c,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x0c,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x05,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x05,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x11,0xc9,0x7e,0x00,0x06,0xff] +0x6b,0xfe,0x11,0xc9,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x08,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x08,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x20,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x20,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2c,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x2c,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x12,0xc9,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x12,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x02,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x02,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x00,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x00,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x22,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x22,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2a,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x2a,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2e,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x2e,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x14,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x14,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x30,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x30,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x16,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x16,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0e,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x0e,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x06,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x06,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0a,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x0a,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x28,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x28,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0c,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x0c,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x05,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x05,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x11,0xc9,0x7f,0x00,0x06,0xff] +0x6a,0xfe,0x11,0xc9,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x08,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x08,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x20,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x20,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x2c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x12,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x12,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x02,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x02,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x00,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x00,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x04,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x04,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x22,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x22,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x2a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x2e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x14,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x14,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x30,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x30,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x16,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x16,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x10,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x10,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x0e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x06,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x06,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x0a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x28,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x28,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_add_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x0c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x48,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x48,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x60,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x60,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6c,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x6c,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x52,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x52,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmaak_f32 v6, 0.5, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x42,0xca,0xf0,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x42,0xca,0xf0,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x40,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x40,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x44,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x44,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x62,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x62,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6a,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x6a,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6e,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x6e,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x54,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x54,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x70,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x70,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x56,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x56,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x50,0xca,0xfd,0x00,0x06,0xff] +0xc1,0x08,0x50,0xca,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4e,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x4e,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x46,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x46,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4a,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x4a,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x68,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x68,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4c,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x4c,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x48,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x48,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x60,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x60,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6c,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x6c,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x52,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x52,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, -1, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x42,0xca,0xc1,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x42,0xca,0xc1,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x40,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x40,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x44,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x44,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x62,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x62,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6a,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x6a,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6e,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x6e,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x54,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x54,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x70,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x70,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x56,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x56,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x50,0xca,0xf0,0x00,0x06,0xff] +0xf0,0x06,0x50,0xca,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4e,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x4e,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x46,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x46,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4a,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x4a,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x68,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x68,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4c,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x4c,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x48,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x48,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x60,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x60,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x52,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x52,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x42,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x42,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x40,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x40,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x62,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x62,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x6a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x54,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x54,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x70,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x70,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x56,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x56,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x46,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x46,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x68,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x68,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x45,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x45,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x51,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x51,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x48,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x48,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x60,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x60,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6c,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x6c,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x52,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x52,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x40,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x40,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x62,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x62,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6a,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x6a,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6e,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x6e,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x54,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x54,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x70,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x70,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x56,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x56,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4e,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x4e,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x46,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x46,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4a,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x4a,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x68,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x68,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4c,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x4c,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x7f,0xfe,0x51,0xca,0x7f,0x00,0x06,0xff] +0x7f,0xfe,0x51,0xca,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x48,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x48,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x60,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x60,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6c,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x6c,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x52,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x52,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x40,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x40,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x62,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x62,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6a,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x6a,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6e,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x6e,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x54,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x54,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x70,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x70,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x56,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x56,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4e,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x4e,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x46,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x46,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4a,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x4a,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x68,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x68,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4c,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x4c,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x7e,0xfe,0x51,0xca,0x7e,0x00,0x06,0xff] +0x7e,0xfe,0x51,0xca,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x48,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x48,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x60,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x60,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6c,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x6c,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x52,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x52,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x40,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x40,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x62,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x62,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6a,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x6a,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6e,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x6e,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x54,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x54,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x70,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x70,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x56,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x56,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4e,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x4e,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x46,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x46,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4a,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x4a,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x68,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x68,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4c,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x4c,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x51,0xca,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0x51,0xca,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x48,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x48,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x01,0x04,0x60,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x60,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x6c,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x6c,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x40,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x40,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x62,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x62,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x6a,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x6a,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x6e,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x6e,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x54,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x54,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x70,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x70,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x56,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x56,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4e,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x4e,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x46,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x46,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4a,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x4a,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x01,0x04,0x68,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x68,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4c,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x4c,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s1, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x01,0xfe,0x51,0xca,0x01,0x00,0x06,0xff] +0x01,0xfe,0x51,0xca,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x48,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x48,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x69,0x04,0x60,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x60,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x6c,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x6c,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x52,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x52,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x40,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x40,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x62,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x62,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x6a,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x6a,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x6e,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x6e,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x54,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x54,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x70,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x70,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x56,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x56,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4e,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x4e,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x46,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x46,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4a,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x4a,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x69,0x04,0x68,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x68,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4c,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x4c,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, s105, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x69,0xfe,0x51,0xca,0x69,0x00,0x06,0xff] +0x69,0xfe,0x51,0xca,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x48,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x48,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x60,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x60,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6c,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x6c,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x52,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x52,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x40,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x40,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x62,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x62,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6a,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x6a,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6e,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x6e,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x54,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x54,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x70,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x70,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x56,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x56,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4e,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x4e,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x46,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x46,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4a,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x4a,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x68,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x68,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4c,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x4c,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x51,0xca,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0x51,0xca,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x48,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x48,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x60,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x60,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6c,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x6c,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x52,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x52,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x40,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x40,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x62,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x62,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6a,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x6a,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6e,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x6e,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x54,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x54,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x70,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x70,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x56,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x56,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4e,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x4e,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x46,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x46,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4a,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x4a,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x68,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x68,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4c,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x4c,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7b,0xfe,0x51,0xca,0x7b,0x00,0x06,0xff] +0x7b,0xfe,0x51,0xca,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x48,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x60,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x60,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6c,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x6c,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x52,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x52,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x42,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x42,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x40,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x40,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x62,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x62,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x6a,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x6a,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6e,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x6e,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x54,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x54,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x70,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x70,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x56,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x56,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4e,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x4e,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x46,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x46,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4a,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x4a,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x68,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x68,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4c,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x4c,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x45,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x45,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x51,0xca,0xff,0x01,0x06,0xff] +0x01,0xff,0x51,0xca,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x48,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x60,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x60,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6c,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x6c,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x52,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x52,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x42,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x42,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x40,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x40,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x62,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x62,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x6a,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x6a,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6e,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x6e,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x54,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x54,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x70,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x70,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x56,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x56,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4e,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x4e,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x46,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x46,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4a,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x4a,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x68,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x68,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4c,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x4c,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x45,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x45,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x51,0xca,0x03,0x01,0x06,0xff] +0x02,0xff,0x51,0xca,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x48,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x60,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x60,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6c,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x6c,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x52,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x52,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x42,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x42,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x40,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x40,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x62,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x62,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x6a,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x6a,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6e,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x6e,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x54,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x54,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x70,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x70,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x56,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x56,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4e,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x4e,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x46,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x46,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4a,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x4a,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x68,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x68,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4c,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x4c,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x45,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x45,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x51,0xca,0x02,0x01,0x06,0xff] +0xff,0xff,0x51,0xca,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x48,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x48,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x60,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x60,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6c,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x6c,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x52,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x52,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x42,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x42,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x40,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x40,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x62,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x62,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x6a,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x6a,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6e,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x6e,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x54,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x54,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x70,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x70,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x56,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x56,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4e,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x4e,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x46,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x46,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4a,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x4a,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x68,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x68,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4c,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x4c,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x45,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x45,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x51,0xca,0x04,0x01,0x06,0xff] +0x03,0xff,0x51,0xca,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x48,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x60,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x60,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6c,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x6c,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x52,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x52,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x42,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x42,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x40,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x40,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x62,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x62,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x6a,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x6a,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6e,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x6e,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x54,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x54,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x70,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x70,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x56,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x56,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4e,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x4e,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x46,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x46,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4a,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x4a,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x68,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x68,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4c,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x4c,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x45,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x45,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x51,0xca,0x01,0x01,0x06,0xff] +0x04,0xff,0x51,0xca,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x48,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x48,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x60,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x60,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6c,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x6c,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x52,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x52,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x40,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x40,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x62,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x62,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6a,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x6a,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6e,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x6e,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x54,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x54,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x70,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x70,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x56,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x56,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4e,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x4e,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x46,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x46,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4a,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x4a,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x68,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x68,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4c,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x4c,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x6b,0xfe,0x51,0xca,0x6b,0x00,0x06,0xff] +0x6b,0xfe,0x51,0xca,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x48,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x48,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x60,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x60,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6c,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x6c,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x52,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x52,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x40,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x40,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x62,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x62,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6a,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x6a,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6e,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x6e,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x54,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x54,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x70,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x70,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x56,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x56,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4e,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x4e,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x46,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x46,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4a,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x4a,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x68,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x68,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4c,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x4c,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x6a,0xfe,0x51,0xca,0x6a,0x00,0x06,0xff] +0x6a,0xfe,0x51,0xca,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x48,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x48,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x60,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x60,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x52,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x52,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x42,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x42,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x40,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x40,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x44,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x44,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x62,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x62,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x54,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x54,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x70,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x70,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x56,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x56,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x50,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x50,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x46,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x46,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x68,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x68,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_cndmask_b32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x48,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x48,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x60,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x60,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6c,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x6c,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_cndmask_b32 v6, 0.5, v5 ; encoding: [0xc1,0x08,0x52,0xc8,0xf0,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x52,0xc8,0xf0,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x42,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x42,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x40,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x40,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x44,0xc8,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x44,0xc8,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x62,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x62,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6a,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x6a,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6e,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x6e,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x54,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x54,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x70,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x70,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x56,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x56,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x50,0xc8,0xfd,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x50,0xc8,0xfd,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4e,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x4e,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x46,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x46,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4a,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x4a,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x68,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x68,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, -1, v4, 0xaf123456 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4c,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x4c,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x48,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x48,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x60,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x60,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6c,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x6c,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_cndmask_b32 v6, -1, v2 ; encoding: [0xf0,0x06,0x52,0xc8,0xc1,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x52,0xc8,0xc1,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x42,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x42,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x40,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x40,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x44,0xc8,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x44,0xc8,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x62,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x62,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6a,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x6a,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6e,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x6e,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x54,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x54,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x70,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x70,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x56,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x56,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x50,0xc8,0xf0,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x50,0xc8,0xf0,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4e,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x4e,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x46,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x46,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4a,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x4a,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x68,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x68,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0.5, v3, 0xaf123456 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4c,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x4c,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x48,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x48,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x60,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x60,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x52,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x52,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x42,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x42,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x40,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x40,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x62,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x62,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x6a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x54,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x54,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x70,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x70,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x56,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x56,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x46,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x46,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x68,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x68,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v2, 0xaf123456 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x45,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x45,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x51,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x51,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x48,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x48,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x60,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x60,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6c,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x6c,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x42,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x42,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x40,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x40,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x62,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x62,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6a,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x6a,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x6e,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x6e,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x54,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x54,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x70,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x70,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x56,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x56,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4e,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x4e,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x46,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x46,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4a,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x4a,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x68,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x68,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v2, 0xaf123456 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x4c,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x4c,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x45,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x45,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_hi, v255, 0xaf123456 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x7f,0xfe,0x51,0xc8,0x7f,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x51,0xc8,0x7f,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x48,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x48,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x60,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x60,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6c,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x6c,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x42,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x42,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x40,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x40,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x62,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x62,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6a,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x6a,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x6e,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x6e,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x54,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x54,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x70,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x70,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x56,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x56,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4e,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x4e,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x46,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x46,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4a,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x4a,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x68,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x68,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v2, 0xaf123456 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x4c,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x4c,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x45,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x45,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, exec_lo, v255, 0xaf123456 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x7e,0xfe,0x51,0xc8,0x7e,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x51,0xc8,0x7e,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x48,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x48,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x60,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x60,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6c,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x6c,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x42,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x42,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x40,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x40,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x62,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x62,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6a,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x6a,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6e,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x6e,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x54,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x54,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x70,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x70,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x56,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x56,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4e,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x4e,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x46,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x46,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4a,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x4a,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x68,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x68,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v2, 0xaf123456 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4c,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x4c,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x45,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x45,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, m0, v255, 0xaf123456 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x51,0xc8,0x7d,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x51,0xc8,0x7d,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x48,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x48,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x01,0x04,0x60,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x60,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x6c,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x6c,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x42,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x42,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x40,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x40,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x62,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x62,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x6a,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x6a,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x6e,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x6e,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x54,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x54,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x01,0x04,0x70,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x70,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x56,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x56,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4e,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x4e,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x46,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x46,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4a,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x4a,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x01,0x04,0x68,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x68,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v2, 0xaf123456 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x01,0x04,0x4c,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x4c,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x45,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x45,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s1, v255, 0xaf123456 :: v_dual_mov_b32 v6, s1 ; encoding: [0x01,0xfe,0x51,0xc8,0x01,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x51,0xc8,0x01,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x48,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x48,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x69,0x04,0x60,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x60,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x6c,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x6c,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x42,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x42,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x40,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x40,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x62,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x62,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x6a,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x6a,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x6e,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x6e,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x54,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x54,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x69,0x04,0x70,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x70,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x56,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x56,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4e,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x4e,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x46,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x46,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4a,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x4a,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x69,0x04,0x68,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x68,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v2, 0xaf123456 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x69,0x04,0x4c,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x4c,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x45,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x45,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, s105, v255, 0xaf123456 :: v_dual_mov_b32 v6, s105 ; encoding: [0x69,0xfe,0x51,0xc8,0x69,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x51,0xc8,0x69,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x48,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x48,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x60,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x60,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6c,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x6c,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x42,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x42,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x40,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x40,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x62,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x62,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6a,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x6a,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6e,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x6e,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x54,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x54,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x70,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x70,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x56,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x56,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4e,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x4e,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x46,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x46,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4a,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x4a,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x68,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x68,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v2, 0xaf123456 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4c,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x4c,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x45,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x45,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, src_scc, v255, 0xaf123456 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x51,0xc8,0xc1,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x51,0xc8,0xc1,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x48,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x48,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x60,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x60,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6c,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x6c,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x42,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x42,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x40,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x40,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x62,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x62,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6a,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x6a,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x6e,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x6e,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x54,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x54,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x70,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x70,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x56,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x56,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4e,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x4e,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x46,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x46,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4a,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x4a,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x68,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x68,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v2, 0xaf123456 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x4c,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x4c,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x45,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x45,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, ttmp15, v255, 0xaf123456 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7b,0xfe,0x51,0xc8,0x7b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x51,0xc8,0x7b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x48,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x60,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x60,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6c,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x6c,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x52,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x52,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x42,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x42,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x40,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x40,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x62,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x62,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x6a,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x6a,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6e,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x6e,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x54,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x54,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x70,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x70,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x56,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x56,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4e,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x4e,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x46,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x46,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4a,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x4a,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x68,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x68,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4c,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x4c,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x45,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x45,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v1, v255, 0xaf123456 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x51,0xc8,0xff,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x51,0xc8,0xff,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x48,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x60,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x60,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6c,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x6c,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x52,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x52,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x42,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x42,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x40,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x40,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x62,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x62,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x6a,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x6a,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6e,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x6e,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x54,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x54,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x70,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x70,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x56,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x56,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4e,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x4e,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x46,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x46,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4a,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x4a,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x68,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x68,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4c,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x4c,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x45,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x45,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v2, v255, 0xaf123456 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x51,0xc8,0x03,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x51,0xc8,0x03,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x48,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x60,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x60,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6c,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x6c,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x52,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x52,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x42,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x42,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x40,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x40,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x62,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x62,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x6a,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x6a,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6e,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x6e,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x54,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x54,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x70,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x70,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x56,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x56,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4e,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x4e,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x46,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x46,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4a,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x4a,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x68,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x68,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4c,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x4c,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x45,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x45,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v255, v255, 0xaf123456 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x51,0xc8,0x02,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x51,0xc8,0x02,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x48,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x48,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x60,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x60,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6c,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x6c,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x52,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x52,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x42,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x42,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x40,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x40,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x62,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x62,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x6a,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x6a,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6e,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x6e,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x54,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x54,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x70,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x70,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x56,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x56,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4e,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x4e,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x46,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x46,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4a,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x4a,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x68,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x68,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4c,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x4c,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x45,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x45,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v3, v255, 0xaf123456 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x51,0xc8,0x04,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x51,0xc8,0x04,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x48,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x60,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x60,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6c,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x6c,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x52,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x52,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x42,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x42,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x40,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x40,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x62,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x62,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x6a,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x6a,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6e,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x6e,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x54,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x54,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x70,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x70,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x56,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x56,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4e,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x4e,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x46,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x46,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4a,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x4a,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x68,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x68,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v2, 0xaf123456 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4c,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x4c,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x45,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x45,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, v4, v255, 0xaf123456 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x51,0xc8,0x01,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x51,0xc8,0x01,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x48,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x48,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x60,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x60,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6c,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x6c,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x42,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x42,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x40,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x40,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x62,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x62,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6a,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x6a,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x6e,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x6e,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x54,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x54,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x70,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x70,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x56,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x56,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4e,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x4e,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x46,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x46,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4a,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x4a,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x68,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x68,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v2, 0xaf123456 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x4c,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x4c,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x45,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x45,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_hi, v255, 0xaf123456 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x6b,0xfe,0x51,0xc8,0x6b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x51,0xc8,0x6b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x48,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x48,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x60,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x60,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6c,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x6c,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x42,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x42,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x40,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x40,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x62,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x62,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6a,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x6a,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x6e,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x6e,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x54,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x54,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x70,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x70,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x56,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x56,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4e,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x4e,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x46,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x46,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4a,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x4a,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x68,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x68,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v2, 0xaf123456 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x4c,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x4c,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v255, 0xaf123456 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x45,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x45,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v255, vcc_lo, v255, 0xaf123456 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x6a,0xfe,0x51,0xc8,0x6a,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x51,0xc8,0x6a,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x48,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x48,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x60,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x60,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x52,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x52,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x42,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x42,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x40,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x40,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x44,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x44,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x62,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x62,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x54,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x54,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x70,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x70,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x56,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x56,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x50,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x50,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x46,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x46,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x68,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x68,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmaak_f32 v6, null, v5, 0xaf123456 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x08,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x08,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x20,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x20,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2c,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x2c,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x12,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x12,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x02,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x02,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x00,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x00,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v4 ; encoding: [0xc1,0x08,0x04,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x04,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x22,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x22,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2a,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x2a,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x2e,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x2e,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x14,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x14,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x30,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x30,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x16,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x16,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x10,0xc8,0xfd,0x00,0x06,0xff] +0xc1,0x08,0x10,0xc8,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0e,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x0e,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x06,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x06,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0a,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x0a,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x28,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x28,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x0c,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x0c,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x08,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x08,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x20,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x20,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2c,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x2c,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x12,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x12,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x02,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x02,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x00,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x00,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v4 ; encoding: [0xf0,0x06,0x04,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x04,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x22,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x22,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2a,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x2a,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x2e,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x2e,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x14,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x14,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x30,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x30,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x16,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x16,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x10,0xc8,0xf0,0x00,0x06,0xff] +0xf0,0x06,0x10,0xc8,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0e,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x0e,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x06,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x06,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0a,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x0a,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x28,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x28,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x0c,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x0c,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x08,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x08,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x20,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x20,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x2c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x12,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x12,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x02,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x00,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x00,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x22,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x22,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x2a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x2e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x2e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x14,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x14,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x30,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x30,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x16,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x16,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x0e,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x06,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x06,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x0a,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x28,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x28,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x0c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x0c,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 ; encoding: [0xff,0xfe,0x05,0xc8,0x7c,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x05,0xc8,0x7c,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x11,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x11,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x08,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x08,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x20,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x20,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2c,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x2c,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x12,0xc8,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x12,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x02,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x02,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x00,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x00,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x22,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x22,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2a,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x2a,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x2e,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x2e,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x14,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x14,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x30,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x30,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x16,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x16,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0e,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x0e,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x06,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x06,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0a,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x0a,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x28,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x28,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x0c,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x0c,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v4 ; encoding: [0x7f,0xfe,0x05,0xc8,0x7f,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x05,0xc8,0x7f,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x11,0xc8,0x6b,0x00,0x06,0xff] +0x7f,0xfe,0x11,0xc8,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x08,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x08,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x20,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x20,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2c,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x2c,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x12,0xc8,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x12,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x02,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x02,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x00,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x00,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x22,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x22,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2a,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x2a,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x2e,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x2e,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x14,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x14,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x30,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x30,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x16,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x16,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0e,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x0e,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x06,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x06,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0a,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x0a,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x28,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x28,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x0c,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x0c,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v4 ; encoding: [0x7e,0xfe,0x05,0xc8,0x7e,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x05,0xc8,0x7e,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x11,0xc8,0x7b,0x00,0x06,0xff] +0x7e,0xfe,0x11,0xc8,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x08,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x08,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x20,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x20,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2c,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x2c,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x12,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x12,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x02,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x02,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x00,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x00,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x22,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x22,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2a,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x2a,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x2e,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x2e,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x14,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x14,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x30,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x30,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x16,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x16,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0e,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x0e,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x06,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x06,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0a,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x0a,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x28,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x28,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x0c,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x0c,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v4 ; encoding: [0x7d,0xfe,0x05,0xc8,0x7d,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x05,0xc8,0x7d,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x11,0xc8,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0x11,0xc8,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x08,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x08,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x20,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x20,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x2c,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x2c,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x12,0xc8,0x01,0x06,0x06,0xff] +0x01,0x04,0x12,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x02,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x02,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x00,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x00,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x22,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x22,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x2a,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x2a,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x2e,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x2e,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x14,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x14,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x30,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x30,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x16,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x16,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0e,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x0e,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x06,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x06,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0a,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x0a,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x28,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x28,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x0c,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0x0c,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v4 ; encoding: [0x01,0xfe,0x05,0xc8,0x01,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x05,0xc8,0x01,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x11,0xc8,0x69,0x00,0x06,0xff] +0x01,0xfe,0x11,0xc8,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x08,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x08,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x20,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x20,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x2c,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x2c,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x12,0xc8,0x69,0x06,0x06,0xff] +0x69,0x04,0x12,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x02,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x02,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x00,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x00,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x22,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x22,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x2a,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x2a,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x2e,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x2e,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x14,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x14,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x30,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x30,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x16,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x16,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0e,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x0e,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x06,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x06,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0a,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x0a,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x28,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x28,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x0c,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0x0c,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v4 ; encoding: [0x69,0xfe,0x05,0xc8,0x69,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x05,0xc8,0x69,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x11,0xc8,0x01,0x00,0x06,0xff] +0x69,0xfe,0x11,0xc8,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x08,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x08,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x20,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x20,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2c,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x2c,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x12,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x12,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x02,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x02,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x00,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x00,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x22,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x22,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2a,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x2a,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x2e,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x2e,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x14,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x14,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x30,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x30,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x16,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x16,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0e,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x0e,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x06,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x06,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0a,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x0a,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x28,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x28,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x0c,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x0c,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v4 ; encoding: [0xfd,0xfe,0x05,0xc8,0xc1,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x05,0xc8,0xc1,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x11,0xc8,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0x11,0xc8,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x08,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x08,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x20,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x20,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2c,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x2c,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x12,0xc8,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x12,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x02,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x02,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x00,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x00,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x22,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x22,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2a,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x2a,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x2e,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x2e,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x14,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x14,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x30,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x30,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x16,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x16,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0e,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x0e,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x06,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x06,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0a,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x0a,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x28,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x28,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x0c,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x0c,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v4 ; encoding: [0x7b,0xfe,0x05,0xc8,0x7b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x05,0xc8,0x7b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x11,0xc8,0x6a,0x00,0x06,0xff] +0x7b,0xfe,0x11,0xc8,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x08,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x08,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x20,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x20,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x2c,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x2c,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x12,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x12,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x02,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x02,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x00,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x00,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x22,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x22,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x2a,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x2a,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x2e,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x2e,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x14,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x14,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x30,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x30,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x16,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x16,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0e,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x0e,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x06,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x06,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0a,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x0a,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x28,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x28,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x0c,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0x0c,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v4 ; encoding: [0x01,0xff,0x05,0xc8,0xff,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x05,0xc8,0xff,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x11,0xc8,0xff,0x01,0x06,0xff] +0x01,0xff,0x11,0xc8,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x08,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x08,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x20,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x20,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x2c,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x2c,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x12,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x12,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x02,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x02,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x00,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x00,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x22,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x22,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x2a,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x2a,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x2e,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x2e,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x14,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x14,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x30,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x30,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x16,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x16,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0e,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x0e,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x06,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x06,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0a,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x0a,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x28,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x28,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x0c,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0x0c,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v4 ; encoding: [0x02,0xff,0x05,0xc8,0x03,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x05,0xc8,0x03,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x11,0xc8,0x03,0x01,0x06,0xff] +0x02,0xff,0x11,0xc8,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x08,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x08,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x20,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x20,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x2c,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x2c,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x12,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x12,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x02,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x02,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x00,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x00,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x22,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x22,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x2a,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x2a,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x2e,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x2e,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x14,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x14,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x30,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x30,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x16,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x16,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0e,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x0e,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x06,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x06,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0a,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x0a,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x28,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x28,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x0c,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0x0c,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v4 ; encoding: [0xff,0xff,0x05,0xc8,0x02,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x05,0xc8,0x02,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x11,0xc8,0x02,0x01,0x06,0xff] +0xff,0xff,0x11,0xc8,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x08,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x08,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x20,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x20,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x2c,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x2c,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x12,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x12,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x02,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x02,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x00,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x00,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x22,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x22,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x2a,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x2a,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x2e,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x2e,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x14,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x14,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x30,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x30,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x16,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x16,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0e,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x0e,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x06,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x06,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0a,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x0a,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x28,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x28,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x0c,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0x0c,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v4 ; encoding: [0x03,0xff,0x05,0xc8,0x04,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x05,0xc8,0x04,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x11,0xc8,0x04,0x01,0x06,0xff] +0x03,0xff,0x11,0xc8,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x08,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x08,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x20,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x20,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x2c,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x2c,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x12,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x12,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x02,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x02,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x00,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x00,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x22,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x22,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x2a,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x2a,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x2e,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x2e,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x14,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x14,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x30,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x30,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x16,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x16,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0e,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x0e,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x06,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x06,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0a,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x0a,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x28,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x28,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x0c,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0x0c,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v4 ; encoding: [0x04,0xff,0x05,0xc8,0x01,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x05,0xc8,0x01,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x11,0xc8,0x01,0x01,0x06,0xff] +0x04,0xff,0x11,0xc8,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x08,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x08,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x20,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x20,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2c,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x2c,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x12,0xc8,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x12,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x02,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x02,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x00,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x00,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x22,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x22,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2a,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x2a,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x2e,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x2e,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x14,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x14,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x30,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x30,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x16,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x16,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0e,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x0e,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x06,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x06,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0a,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x0a,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x28,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x28,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x0c,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x0c,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v4 ; encoding: [0x6b,0xfe,0x05,0xc8,0x6b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x05,0xc8,0x6b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x11,0xc8,0x7e,0x00,0x06,0xff] +0x6b,0xfe,0x11,0xc8,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x08,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x08,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x20,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x20,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2c,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x2c,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x12,0xc8,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x12,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x02,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x02,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x00,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x00,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x22,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x22,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2a,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x2a,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x2e,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x2e,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x14,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x14,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x30,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x30,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x16,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x16,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0e,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x0e,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x06,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x06,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0a,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x0a,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x28,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x28,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x0c,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x0c,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v4 ; encoding: [0x6a,0xfe,0x05,0xc8,0x6a,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x05,0xc8,0x6a,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x11,0xc8,0x7f,0x00,0x06,0xff] +0x6a,0xfe,0x11,0xc8,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x08,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x08,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x20,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x20,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x2c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x12,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x12,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x02,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x02,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x00,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x00,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0x7c,0x0a,0x04,0xc8,0xff,0xfe,0xff,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x04,0xc8,0xff,0xfe,0xff,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x22,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x22,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x2a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x2e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x2e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x14,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x14,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x30,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x30,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x16,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x16,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x10,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x10,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x0e,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x06,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x06,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x0a,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x28,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x28,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmac_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x0c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x0c,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_add_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x89,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x89,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xa1,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0xa1,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xad,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0xad,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, 0.5, v4 ; encoding: [0xc1,0xfe,0x93,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x93,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, src_scc, v4, 0xaf123456 ; encoding: [0xc1,0xfe,0x83,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x83,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x81,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x81,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v4 ; encoding: [0xc1,0xfe,0x85,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x85,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xa3,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0xa3,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xab,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0xab,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_max_i32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xaf,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0xaf,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x95,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x95,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_min_i32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xb1,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0xb1,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x97,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x97,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0xfe,0x91,0xc8,0xfd,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x91,0xc8,0xfd,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x8f,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x8f,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_mul_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x87,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x87,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_sub_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x8b,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x8b,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0xa9,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0xa9,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, -1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, src_scc, v4 ; encoding: [0xc1,0xfe,0x8d,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0xfe,0x8d,0xc8,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_add_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x89,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x89,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xa1,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0xa1,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xad,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0xad,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xf0,0xfe,0x93,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x93,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, 0.5, v3, 0xaf123456 ; encoding: [0xf0,0xfe,0x83,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x83,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmac_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x81,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x81,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v4 ; encoding: [0xf0,0xfe,0x85,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x85,0xc8,0xf0,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xa3,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0xa3,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xab,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0xab,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_max_i32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xaf,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0xaf,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_max_num_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x95,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x95,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_min_i32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xb1,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0xb1,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_min_num_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x97,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x97,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0xfe,0x91,0xc8,0xf0,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x91,0xc8,0xf0,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x8f,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x8f,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_mul_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x87,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x87,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_sub_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x8b,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x8b,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0xa9,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0xa9,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0.5, 0xaf123456, v255 :: v_dual_subrev_f32 v6, 0.5, v3 ; encoding: [0xf0,0xfe,0x8d,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0xfe,0x8d,0xc8,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_add_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x89,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x89,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, null, v255 ; encoding: [0xff,0xfe,0xa1,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xa1,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, null, v255 ; encoding: [0xff,0xfe,0xad,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xad,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, null, v255 ; encoding: [0xff,0xfe,0x93,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x93,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, null, v255, 0xaf123456 ; encoding: [0xff,0xfe,0x83,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x83,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmac_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x81,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x81,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 ; encoding: [0xff,0xfe,0x85,0xc8,0x7c,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x85,0xc8,0x7c,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, null, v255 ; encoding: [0xff,0xfe,0xa3,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xa3,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, null, v255 ; encoding: [0xff,0xfe,0xab,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xab,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_max_i32 v6, null, v255 ; encoding: [0xff,0xfe,0xaf,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xaf,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_max_num_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x95,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x95,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_min_i32 v6, null, v255 ; encoding: [0xff,0xfe,0xb1,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xb1,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_min_num_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x97,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x97,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x91,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x91,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x8f,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x8f,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_mul_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x87,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x87,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_sub_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x8b,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x8b,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, null, v255 ; encoding: [0xff,0xfe,0xa9,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xa9,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 :: v_dual_subrev_f32 v6, null, v255 ; encoding: [0xff,0xfe,0x8d,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x8d,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_add_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x89,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x89,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xa1,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xa1,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xad,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xad,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, exec_hi, v255, 0xaf123456 ; encoding: [0x7f,0xfe,0x83,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x83,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmac_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x81,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x81,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v4 ; encoding: [0x7f,0xfe,0x85,0xc8,0x7f,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x85,0xc8,0x7f,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xa3,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xa3,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xab,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xab,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_max_i32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xaf,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xaf,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_max_num_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x95,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x95,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_min_i32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xb1,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xb1,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_min_num_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x97,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x97,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x7f,0xfe,0x91,0xc8,0x7f,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x91,0xc8,0x7f,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x8f,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x8f,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_mul_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x87,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x87,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_sub_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x8b,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x8b,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0xa9,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xa9,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_hi, 0xaf123456, v255 :: v_dual_subrev_f32 v6, exec_hi, v255 ; encoding: [0x7f,0xfe,0x8d,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x8d,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_add_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x89,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x89,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xa1,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xa1,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xad,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xad,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, exec_lo, v255, 0xaf123456 ; encoding: [0x7e,0xfe,0x83,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x83,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmac_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x81,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x81,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v4 ; encoding: [0x7e,0xfe,0x85,0xc8,0x7e,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x85,0xc8,0x7e,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xa3,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xa3,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xab,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xab,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_max_i32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xaf,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xaf,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_max_num_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x95,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x95,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_min_i32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xb1,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xb1,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_min_num_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x97,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x97,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x7e,0xfe,0x91,0xc8,0x7e,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x91,0xc8,0x7e,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x8f,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x8f,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_mul_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x87,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x87,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_sub_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x8b,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x8b,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0xa9,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xa9,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, exec_lo, 0xaf123456, v255 :: v_dual_subrev_f32 v6, exec_lo, v255 ; encoding: [0x7e,0xfe,0x8d,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x8d,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_add_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x89,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x89,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xa1,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xa1,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xad,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xad,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, m0, v255, 0xaf123456 ; encoding: [0x7d,0xfe,0x83,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x83,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmac_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x81,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x81,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v4 ; encoding: [0x7d,0xfe,0x85,0xc8,0x7d,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x85,0xc8,0x7d,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xa3,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xa3,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xab,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xab,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_max_i32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xaf,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xaf,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_max_num_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x95,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x95,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_min_i32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xb1,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xb1,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_min_num_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x97,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x97,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x91,0xc8,0x7d,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x91,0xc8,0x7d,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x8f,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x8f,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_mul_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x87,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x87,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_sub_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x8b,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x8b,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, m0, v255 ; encoding: [0x7d,0xfe,0xa9,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xa9,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, m0, 0xaf123456, v255 :: v_dual_subrev_f32 v6, m0, v255 ; encoding: [0x7d,0xfe,0x8d,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x8d,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_add_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x89,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x89,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, s1, v255 ; encoding: [0x01,0xfe,0xa1,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xa1,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, s1, v255 ; encoding: [0x01,0xfe,0xad,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xad,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, s1, v255, 0xaf123456 ; encoding: [0x01,0xfe,0x83,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x83,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x81,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x81,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v4 ; encoding: [0x01,0xfe,0x85,0xc8,0x01,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x85,0xc8,0x01,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, s1, v255 ; encoding: [0x01,0xfe,0xa3,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xa3,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, s1, v255 ; encoding: [0x01,0xfe,0xab,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xab,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_max_i32 v6, s1, v255 ; encoding: [0x01,0xfe,0xaf,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xaf,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x95,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x95,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_min_i32 v6, s1, v255 ; encoding: [0x01,0xfe,0xb1,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xb1,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x97,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x97,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x01,0xfe,0x91,0xc8,0x01,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x91,0xc8,0x01,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x8f,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x8f,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_mul_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x87,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x87,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_sub_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x8b,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x8b,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, s1, v255 ; encoding: [0x01,0xfe,0xa9,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xa9,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, s1, v255 ; encoding: [0x01,0xfe,0x8d,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x8d,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_add_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x89,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x89,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, s105, v255 ; encoding: [0x69,0xfe,0xa1,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xa1,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, s105, v255 ; encoding: [0x69,0xfe,0xad,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xad,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, s105, v255, 0xaf123456 ; encoding: [0x69,0xfe,0x83,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x83,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmac_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x81,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x81,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v4 ; encoding: [0x69,0xfe,0x85,0xc8,0x69,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x85,0xc8,0x69,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, s105, v255 ; encoding: [0x69,0xfe,0xa3,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xa3,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, s105, v255 ; encoding: [0x69,0xfe,0xab,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xab,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_max_i32 v6, s105, v255 ; encoding: [0x69,0xfe,0xaf,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xaf,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_max_num_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x95,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x95,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_min_i32 v6, s105, v255 ; encoding: [0x69,0xfe,0xb1,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xb1,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_min_num_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x97,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x97,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x69,0xfe,0x91,0xc8,0x69,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x91,0xc8,0x69,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x8f,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x8f,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_mul_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x87,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x87,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_sub_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x8b,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x8b,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, s105, v255 ; encoding: [0x69,0xfe,0xa9,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xa9,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, s105, 0xaf123456, v255 :: v_dual_subrev_f32 v6, s105, v255 ; encoding: [0x69,0xfe,0x8d,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x8d,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_add_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x89,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x89,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xa1,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xa1,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xad,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xad,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, -1, v255, 0xaf123456 ; encoding: [0xfd,0xfe,0x83,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x83,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmac_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x81,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x81,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v4 ; encoding: [0xfd,0xfe,0x85,0xc8,0xc1,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x85,0xc8,0xc1,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xa3,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xa3,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xab,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xab,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_max_i32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xaf,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xaf,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_max_num_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x95,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x95,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_min_i32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xb1,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xb1,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_min_num_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x97,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x97,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x91,0xc8,0xc1,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x91,0xc8,0xc1,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x8f,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x8f,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_mul_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x87,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x87,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_sub_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x8b,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x8b,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, -1, v255 ; encoding: [0xfd,0xfe,0xa9,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xa9,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, src_scc, 0xaf123456, v255 :: v_dual_subrev_f32 v6, -1, v255 ; encoding: [0xfd,0xfe,0x8d,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x8d,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_add_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x89,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x89,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xa1,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xa1,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xad,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xad,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, ttmp15, v255, 0xaf123456 ; encoding: [0x7b,0xfe,0x83,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x83,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmac_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x81,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x81,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v4 ; encoding: [0x7b,0xfe,0x85,0xc8,0x7b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x85,0xc8,0x7b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xa3,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xa3,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xab,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xab,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_max_i32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xaf,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xaf,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_max_num_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x95,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x95,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_min_i32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xb1,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xb1,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_min_num_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x97,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x97,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7b,0xfe,0x91,0xc8,0x7b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x91,0xc8,0x7b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x8f,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x8f,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_mul_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x87,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x87,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_sub_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x8b,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x8b,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0xa9,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xa9,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, ttmp15, 0xaf123456, v255 :: v_dual_subrev_f32 v6, ttmp15, v255 ; encoding: [0x7b,0xfe,0x8d,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x8d,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_add_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x89,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x89,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v255, v255 ; encoding: [0x01,0xff,0xa1,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xa1,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v255, v255 ; encoding: [0x01,0xff,0xad,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xad,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v255, v255 ; encoding: [0x01,0xff,0x93,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x93,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v255, v255, 0xaf123456 ; encoding: [0x01,0xff,0x83,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x83,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x81,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x81,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v4 ; encoding: [0x01,0xff,0x85,0xc8,0xff,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x85,0xc8,0xff,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v255, v255 ; encoding: [0x01,0xff,0xa3,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xa3,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v255, v255 ; encoding: [0x01,0xff,0xab,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xab,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_max_i32 v6, v255, v255 ; encoding: [0x01,0xff,0xaf,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xaf,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x95,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x95,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_min_i32 v6, v255, v255 ; encoding: [0x01,0xff,0xb1,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xb1,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x97,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x97,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x91,0xc8,0xff,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x91,0xc8,0xff,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x8f,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x8f,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_mul_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x87,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x87,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_sub_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x8b,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x8b,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v255, v255 ; encoding: [0x01,0xff,0xa9,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xa9,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v1, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v255, v255 ; encoding: [0x01,0xff,0x8d,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x8d,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_add_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x89,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x89,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v3, v255 ; encoding: [0x02,0xff,0xa1,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xa1,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v3, v255 ; encoding: [0x02,0xff,0xad,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xad,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v3, v255 ; encoding: [0x02,0xff,0x93,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x93,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v3, v255, 0xaf123456 ; encoding: [0x02,0xff,0x83,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x83,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x81,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x81,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v4 ; encoding: [0x02,0xff,0x85,0xc8,0x03,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x85,0xc8,0x03,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v3, v255 ; encoding: [0x02,0xff,0xa3,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xa3,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v3, v255 ; encoding: [0x02,0xff,0xab,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xab,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_max_i32 v6, v3, v255 ; encoding: [0x02,0xff,0xaf,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xaf,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x95,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x95,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_min_i32 v6, v3, v255 ; encoding: [0x02,0xff,0xb1,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xb1,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x97,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x97,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x91,0xc8,0x03,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x91,0xc8,0x03,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x8f,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x8f,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_mul_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x87,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x87,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_sub_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x8b,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x8b,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v3, v255 ; encoding: [0x02,0xff,0xa9,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xa9,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v2, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v3, v255 ; encoding: [0x02,0xff,0x8d,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x8d,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_add_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x89,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x89,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v2, v255 ; encoding: [0xff,0xff,0xa1,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xa1,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v2, v255 ; encoding: [0xff,0xff,0xad,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xad,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v2, v255 ; encoding: [0xff,0xff,0x93,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x93,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v2, v255, 0xaf123456 ; encoding: [0xff,0xff,0x83,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x83,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x81,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x81,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v4 ; encoding: [0xff,0xff,0x85,0xc8,0x02,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x85,0xc8,0x02,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v2, v255 ; encoding: [0xff,0xff,0xa3,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xa3,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v2, v255 ; encoding: [0xff,0xff,0xab,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xab,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_max_i32 v6, v2, v255 ; encoding: [0xff,0xff,0xaf,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xaf,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x95,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x95,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_min_i32 v6, v2, v255 ; encoding: [0xff,0xff,0xb1,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xb1,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x97,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x97,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x91,0xc8,0x02,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x91,0xc8,0x02,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x8f,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x8f,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_mul_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x87,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x87,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_sub_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x8b,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x8b,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v2, v255 ; encoding: [0xff,0xff,0xa9,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xa9,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v255, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v2, v255 ; encoding: [0xff,0xff,0x8d,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x8d,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_add_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x89,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x89,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v4, v255 ; encoding: [0x03,0xff,0xa1,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xa1,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v4, v255 ; encoding: [0x03,0xff,0xad,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xad,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v4, v255 ; encoding: [0x03,0xff,0x93,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x93,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v4, v255, 0xaf123456 ; encoding: [0x03,0xff,0x83,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x83,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x81,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x81,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v4 ; encoding: [0x03,0xff,0x85,0xc8,0x04,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x85,0xc8,0x04,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v4, v255 ; encoding: [0x03,0xff,0xa3,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xa3,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v4, v255 ; encoding: [0x03,0xff,0xab,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xab,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_max_i32 v6, v4, v255 ; encoding: [0x03,0xff,0xaf,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xaf,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x95,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x95,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_min_i32 v6, v4, v255 ; encoding: [0x03,0xff,0xb1,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xb1,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x97,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x97,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x91,0xc8,0x04,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x91,0xc8,0x04,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x8f,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x8f,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_mul_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x87,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x87,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_sub_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x8b,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x8b,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v4, v255 ; encoding: [0x03,0xff,0xa9,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xa9,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v3, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v4, v255 ; encoding: [0x03,0xff,0x8d,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x8d,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_add_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x89,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x89,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, v1, v255 ; encoding: [0x04,0xff,0xa1,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xa1,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, v1, v255 ; encoding: [0x04,0xff,0xad,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xad,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_cndmask_b32 v6, v1, v255 ; encoding: [0x04,0xff,0x93,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x93,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, v1, v255, 0xaf123456 ; encoding: [0x04,0xff,0x83,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x83,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmac_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x81,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x81,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v4 ; encoding: [0x04,0xff,0x85,0xc8,0x01,0x09,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x85,0xc8,0x01,0x09,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, v1, v255 ; encoding: [0x04,0xff,0xa3,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xa3,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, v1, v255 ; encoding: [0x04,0xff,0xab,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xab,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_max_i32 v6, v1, v255 ; encoding: [0x04,0xff,0xaf,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xaf,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_max_num_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x95,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x95,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_min_i32 v6, v1, v255 ; encoding: [0x04,0xff,0xb1,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xb1,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_min_num_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x97,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x97,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x91,0xc8,0x01,0x01,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x91,0xc8,0x01,0x01,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x8f,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x8f,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_mul_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x87,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x87,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_sub_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x8b,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x8b,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, v1, v255 ; encoding: [0x04,0xff,0xa9,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xa9,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, v4, 0xaf123456, v255 :: v_dual_subrev_f32 v6, v1, v255 ; encoding: [0x04,0xff,0x8d,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x8d,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_add_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x89,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x89,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xa1,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xa1,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xad,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xad,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, vcc_hi, v255, 0xaf123456 ; encoding: [0x6b,0xfe,0x83,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x83,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmac_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x81,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x81,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v4 ; encoding: [0x6b,0xfe,0x85,0xc8,0x6b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x85,0xc8,0x6b,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xa3,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xa3,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xab,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xab,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_max_i32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xaf,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xaf,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_max_num_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x95,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x95,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_min_i32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xb1,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xb1,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_min_num_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x97,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x97,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x6b,0xfe,0x91,0xc8,0x6b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x91,0xc8,0x6b,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x8f,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x8f,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_mul_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x87,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x87,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_sub_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x8b,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x8b,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0xa9,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xa9,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_hi, 0xaf123456, v255 :: v_dual_subrev_f32 v6, vcc_hi, v255 ; encoding: [0x6b,0xfe,0x8d,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x8d,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_add_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x89,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x89,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_add_nc_u32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xa1,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xa1,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_ashrrev_i32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xad,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xad,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmaak_f32 v6, vcc_lo, v255, 0xaf123456 ; encoding: [0x6a,0xfe,0x83,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x83,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmac_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x81,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x81,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v4 ; encoding: [0x6a,0xfe,0x85,0xc8,0x6a,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x85,0xc8,0x6a,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_lshlrev_b32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xa3,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xa3,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_lshrrev_b32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xab,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xab,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_max_i32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xaf,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xaf,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_max_num_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x95,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x95,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_min_i32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xb1,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xb1,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_min_num_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x97,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x97,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x6a,0xfe,0x91,0xc8,0x6a,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x91,0xc8,0x6a,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x8f,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x8f,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_mul_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x87,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x87,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_sub_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x8b,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x8b,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_sub_nc_u32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0xa9,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xa9,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v255, vcc_lo, 0xaf123456, v255 :: v_dual_subrev_f32 v6, vcc_lo, v255 ; encoding: [0x6a,0xfe,0x8d,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x8d,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_add_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x88,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x88,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_add_nc_u32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xa0,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0xa0,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_ashrrev_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xac,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0xac,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_cndmask_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x92,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x92,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmaak_f32 v255, 0xaf123456, v5, 0xaf123456 ; encoding: [0x7c,0x08,0x82,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x82,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmac_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x80,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x80,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0x7c,0x08,0x84,0xc8,0xff,0xfe,0xff,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x84,0xc8,0xff,0xfe,0xff,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_lshlrev_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xa2,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0xa2,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_lshrrev_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xaa,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0xaa,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_max_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xae,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0xae,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_max_num_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x94,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x94,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_min_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xb0,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0xb0,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_min_num_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x96,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x96,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x08,0x90,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x90,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x8e,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x8e,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_mul_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x86,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x86,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_sub_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x8a,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x8a,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_sub_nc_u32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0xa8,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0xa8,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_fmamk_f32 v6, null, 0xaf123456, v4 :: v_dual_subrev_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x08,0x8c,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x08,0x8c,0xc8,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x88,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x88,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa0,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xa0,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xac,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xac,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x92,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x92,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x82,0xca,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x82,0xca,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x80,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x80,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x84,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x84,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa2,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xa2,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xaa,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xaa,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xae,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xae,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x94,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x94,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xb0,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xb0,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x96,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x96,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x90,0xca,0xfd,0x00,0x06,0xff] +0xc1,0x08,0x90,0xca,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8e,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x8e,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x86,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x86,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8a,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x8a,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa8,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xa8,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8c,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x8c,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x88,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x88,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa0,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xa0,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xac,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xac,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x92,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x92,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x82,0xca,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x82,0xca,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x80,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x80,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x84,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x84,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa2,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xa2,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xaa,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xaa,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xae,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xae,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x94,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x94,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xb0,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xb0,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x96,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x96,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x90,0xca,0xf0,0x00,0x06,0xff] +0xf0,0x06,0x90,0xca,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8e,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x8e,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x86,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x86,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8a,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x8a,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa8,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xa8,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8c,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x8c,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x88,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x88,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xa0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xac,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xac,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x92,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x92,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x82,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x82,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x80,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xa2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xaa,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xaa,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xae,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xae,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x94,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x94,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xb0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x96,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x96,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8e,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x86,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x86,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8a,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xa8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8c,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x85,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x85,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x91,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x91,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x88,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x88,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa0,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xa0,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xac,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xac,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x92,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x92,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x82,0xca,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x82,0xca,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x80,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x80,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa2,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xa2,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xaa,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xaa,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xae,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xae,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x94,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x94,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xb0,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xb0,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x96,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x96,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8e,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x8e,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x86,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x86,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8a,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x8a,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa8,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xa8,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8c,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x8c,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x85,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x85,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x91,0xca,0x6b,0x00,0x06,0xff] +0x7f,0xfe,0x91,0xca,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x88,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x88,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa0,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xa0,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xac,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xac,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x92,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x92,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x82,0xca,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x82,0xca,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x80,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x80,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa2,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xa2,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xaa,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xaa,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xae,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xae,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x94,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x94,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xb0,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xb0,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x96,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x96,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8e,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x8e,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x86,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x86,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8a,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x8a,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa8,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xa8,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8c,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x8c,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x85,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x85,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x91,0xca,0x7b,0x00,0x06,0xff] +0x7e,0xfe,0x91,0xca,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x88,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x88,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa0,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xa0,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xac,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xac,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x92,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x92,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x82,0xca,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x82,0xca,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x80,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x80,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa2,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xa2,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xaa,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xaa,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xae,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xae,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x94,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x94,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xb0,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xb0,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x96,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x96,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8e,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x8e,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x86,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x86,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8a,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x8a,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa8,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xa8,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8c,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x8c,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x85,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x85,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x91,0xca,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0x91,0xca,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x88,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0x88,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xa0,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xa0,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xac,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xac,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x92,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0x92,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x82,0xca,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x82,0xca,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x80,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0x80,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xa2,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xa2,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xaa,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xaa,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xae,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xae,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x94,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0x94,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xb0,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xb0,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x96,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0x96,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8e,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0x8e,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x86,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0x86,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8a,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0x8a,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xa8,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xa8,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8c,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0x8c,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x85,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x85,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x91,0xca,0x69,0x00,0x06,0xff] +0x01,0xfe,0x91,0xca,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x88,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0x88,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xa0,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xa0,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xac,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xac,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x92,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0x92,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x82,0xca,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x82,0xca,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x80,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0x80,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xa2,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xa2,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xaa,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xaa,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xae,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xae,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x94,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0x94,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xb0,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xb0,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x96,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0x96,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8e,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0x8e,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x86,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0x86,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8a,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0x8a,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xa8,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xa8,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8c,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0x8c,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x85,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x85,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x91,0xca,0x01,0x00,0x06,0xff] +0x69,0xfe,0x91,0xca,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x88,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x88,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa0,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xa0,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xac,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xac,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x92,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x92,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x82,0xca,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x82,0xca,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x80,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x80,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa2,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xa2,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xaa,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xaa,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xae,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xae,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x94,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x94,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xb0,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xb0,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x96,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x96,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8e,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x8e,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x86,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x86,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8a,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x8a,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa8,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xa8,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8c,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x8c,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x85,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x85,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x91,0xca,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0x91,0xca,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x88,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x88,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa0,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xa0,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xac,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xac,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x92,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x92,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x82,0xca,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x82,0xca,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x80,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x80,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa2,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xa2,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xaa,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xaa,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xae,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xae,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x94,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x94,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xb0,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xb0,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x96,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x96,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8e,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x8e,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x86,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x86,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8a,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x8a,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa8,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xa8,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8c,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x8c,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x85,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x85,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x91,0xca,0x6a,0x00,0x06,0xff] +0x7b,0xfe,0x91,0xca,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x88,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x88,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xa0,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xa0,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xac,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xac,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x92,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x92,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x82,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x82,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x80,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x80,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xa2,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xa2,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xaa,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xaa,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xae,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xae,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x94,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x94,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xb0,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xb0,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x96,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x96,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8e,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x8e,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x86,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x86,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8a,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x8a,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xa8,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xa8,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8c,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0x8c,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x85,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x85,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x91,0xca,0xff,0x01,0x06,0xff] +0x01,0xff,0x91,0xca,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x88,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x88,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xa0,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xa0,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xac,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xac,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x92,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x92,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x82,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x82,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x80,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x80,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xa2,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xa2,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xaa,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xaa,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xae,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xae,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x94,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x94,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xb0,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xb0,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x96,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x96,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8e,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x8e,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x86,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x86,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8a,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x8a,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xa8,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xa8,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8c,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0x8c,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x85,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x85,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x91,0xca,0x03,0x01,0x06,0xff] +0x02,0xff,0x91,0xca,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x88,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x88,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xa0,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xa0,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xac,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xac,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x92,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x92,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x82,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x82,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x80,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x80,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xa2,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xa2,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xaa,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xaa,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xae,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xae,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x94,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x94,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xb0,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xb0,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x96,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x96,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8e,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x8e,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x86,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x86,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8a,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x8a,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xa8,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xa8,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8c,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0x8c,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x85,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x85,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x91,0xca,0x02,0x01,0x06,0xff] +0xff,0xff,0x91,0xca,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x88,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x88,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xa0,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xa0,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xac,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xac,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x92,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x92,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x82,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x82,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x80,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x80,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xa2,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xa2,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xaa,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xaa,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xae,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xae,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x94,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x94,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xb0,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xb0,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x96,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x96,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8e,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x8e,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x86,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x86,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8a,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x8a,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xa8,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xa8,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8c,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0x8c,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x85,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x85,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x91,0xca,0x04,0x01,0x06,0xff] +0x03,0xff,0x91,0xca,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x88,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x88,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xa0,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xa0,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xac,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xac,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x92,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x92,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x82,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x82,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x80,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x80,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xa2,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xa2,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xaa,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xaa,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xae,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xae,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x94,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x94,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xb0,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xb0,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x96,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x96,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8e,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x8e,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x86,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x86,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8a,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x8a,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xa8,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xa8,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8c,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0x8c,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x85,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x85,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x91,0xca,0x01,0x01,0x06,0xff] +0x04,0xff,0x91,0xca,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x88,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x88,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa0,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xa0,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xac,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xac,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x92,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x92,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x82,0xca,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x82,0xca,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x80,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x80,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa2,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xa2,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xaa,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xaa,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xae,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xae,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x94,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x94,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xb0,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xb0,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x96,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x96,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8e,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x8e,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x86,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x86,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8a,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x8a,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa8,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xa8,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8c,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x8c,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x85,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x85,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x91,0xca,0x7e,0x00,0x06,0xff] +0x6b,0xfe,0x91,0xca,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x88,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x88,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa0,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xa0,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xac,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xac,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x92,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x92,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x82,0xca,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x82,0xca,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x80,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x80,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa2,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xa2,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xaa,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xaa,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xae,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xae,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x94,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x94,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xb0,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xb0,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x96,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x96,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8e,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x8e,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x86,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x86,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8a,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x8a,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa8,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xa8,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8c,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x8c,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x85,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x85,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x91,0xca,0x7f,0x00,0x06,0xff] +0x6a,0xfe,0x91,0xca,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x88,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x88,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xa0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xac,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xac,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x92,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x92,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x82,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x82,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x80,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x80,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x84,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x84,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xa2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xaa,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xaa,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xae,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xae,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x94,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x94,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xb0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xb0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x96,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x96,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x90,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x90,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x8e,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x86,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x86,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x8a,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xa8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_max_num_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x8c,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc8,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc8,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe0,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe0,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xec,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xec,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd2,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd2,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0xc2,0xca,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0xc2,0xca,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc0,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc0,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0xc4,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0xc4,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe2,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe2,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xea,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xea,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xee,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xee,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd4,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd4,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xf0,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xf0,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd6,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd6,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0xd0,0xca,0xfd,0x00,0x06,0xff] +0xc1,0x08,0xd0,0xca,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xce,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xce,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc6,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc6,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xca,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xca,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe8,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe8,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xcc,0xca,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xcc,0xca,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc8,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc8,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe0,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe0,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xec,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xec,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd2,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd2,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0xc2,0xca,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0xc2,0xca,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc0,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc0,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0xc4,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0xc4,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe2,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe2,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xea,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xea,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xee,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xee,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd4,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd4,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xf0,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xf0,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd6,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd6,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0xd0,0xca,0xf0,0x00,0x06,0xff] +0xf0,0x06,0xd0,0xca,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xce,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xce,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc6,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc6,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xca,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xca,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe8,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe8,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xcc,0xca,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xcc,0xca,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xec,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xec,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0xd2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0xc2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xe2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe2,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xea,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xea,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xee,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xee,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd4,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd4,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xf0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xf0,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd6,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd6,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0xce,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xce,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc6,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc6,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0xca,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xca,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe8,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0xcc,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xcc,0xca,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xc5,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xc5,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0xd1,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xd1,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc8,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc8,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe0,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe0,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xec,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xec,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0xd2,0xca,0x7f,0x06,0x06,0xff] +0x7f,0x04,0xd2,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0xc2,0xca,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0xc2,0xca,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc0,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc0,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe2,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe2,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xea,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xea,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xee,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xee,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd4,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xd4,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xf0,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xf0,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd6,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xd6,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xce,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xce,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc6,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc6,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xca,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xca,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe8,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe8,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xcc,0xca,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xcc,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0xc5,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xc5,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0xd1,0xca,0x6b,0x00,0x06,0xff] +0x7f,0xfe,0xd1,0xca,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc8,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc8,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe0,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe0,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xec,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xec,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0xd2,0xca,0x7e,0x06,0x06,0xff] +0x7e,0x04,0xd2,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0xc2,0xca,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0xc2,0xca,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc0,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc0,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe2,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe2,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xea,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xea,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xee,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xee,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd4,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xd4,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xf0,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xf0,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd6,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xd6,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xce,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xce,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc6,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc6,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xca,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xca,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe8,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe8,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xcc,0xca,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xcc,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0xc5,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xc5,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0xd1,0xca,0x7b,0x00,0x06,0xff] +0x7e,0xfe,0xd1,0xca,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc8,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc8,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe0,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe0,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xec,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xec,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd2,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd2,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0xc2,0xca,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0xc2,0xca,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc0,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc0,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe2,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe2,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xea,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xea,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xee,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xee,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd4,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd4,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xf0,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xf0,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd6,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd6,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xce,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xce,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc6,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc6,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xca,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xca,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe8,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe8,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xcc,0xca,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xcc,0xca,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0xc5,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xc5,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0xd1,0xca,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0xd1,0xca,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc8,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xc8,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe0,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xe0,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xec,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xec,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0xd2,0xca,0x01,0x06,0x06,0xff] +0x01,0x04,0xd2,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0xc2,0xca,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0xc2,0xca,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc0,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xc0,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xe2,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xe2,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xea,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xea,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xee,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xee,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd4,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xd4,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xf0,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xf0,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd6,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xd6,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xce,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xce,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc6,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xc6,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xca,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xca,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe8,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xe8,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xcc,0xca,0x69,0x06,0x06,0xff] +0x01,0x04,0xcc,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0xc5,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xc5,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0xd1,0xca,0x69,0x00,0x06,0xff] +0x01,0xfe,0xd1,0xca,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc8,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xc8,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe0,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xe0,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xec,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xec,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0xd2,0xca,0x69,0x06,0x06,0xff] +0x69,0x04,0xd2,0xca,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0xc2,0xca,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0xc2,0xca,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc0,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xc0,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xe2,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xe2,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xea,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xea,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xee,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xee,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd4,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xd4,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xf0,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xf0,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd6,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xd6,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xce,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xce,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc6,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xc6,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xca,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xca,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe8,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xe8,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xcc,0xca,0x01,0x06,0x06,0xff] +0x69,0x04,0xcc,0xca,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0xc5,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xc5,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0xd1,0xca,0x01,0x00,0x06,0xff] +0x69,0xfe,0xd1,0xca,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc8,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc8,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe0,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe0,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xec,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xec,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd2,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd2,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0xc2,0xca,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0xc2,0xca,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc0,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc0,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe2,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe2,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xea,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xea,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xee,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xee,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd4,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd4,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xf0,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xf0,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd6,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd6,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xce,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xce,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc6,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc6,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xca,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xca,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe8,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe8,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xcc,0xca,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xcc,0xca,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0xc5,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xc5,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0xd1,0xca,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0xd1,0xca,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc8,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc8,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe0,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe0,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xec,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xec,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0xd2,0xca,0x7b,0x06,0x06,0xff] +0x7b,0x04,0xd2,0xca,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0xc2,0xca,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0xc2,0xca,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc0,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc0,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe2,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe2,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xea,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xea,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xee,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xee,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd4,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xd4,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xf0,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xf0,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd6,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xd6,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xce,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xce,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc6,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc6,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xca,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xca,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe8,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe8,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xcc,0xca,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xcc,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0xc5,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xc5,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0xd1,0xca,0x6a,0x00,0x06,0xff] +0x7b,0xfe,0xd1,0xca,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc8,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xc8,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe0,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xe0,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xec,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xec,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xd2,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xd2,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0xc2,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0xc2,0xca,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc0,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xc0,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xe2,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xe2,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xea,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xea,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xee,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xee,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd4,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xd4,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xf0,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xf0,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd6,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xd6,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xce,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xce,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc6,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xc6,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xca,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xca,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe8,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xe8,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xcc,0xca,0xff,0x07,0x06,0xff] +0x01,0x05,0xcc,0xca,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0xc5,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xc5,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0xd1,0xca,0xff,0x01,0x06,0xff] +0x01,0xff,0xd1,0xca,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc8,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xc8,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe0,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xe0,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xec,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xec,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xd2,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xd2,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0xc2,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0xc2,0xca,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc0,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xc0,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xe2,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xe2,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xea,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xea,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xee,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xee,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd4,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xd4,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xf0,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xf0,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd6,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xd6,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xce,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xce,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc6,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xc6,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xca,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xca,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe8,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xe8,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xcc,0xca,0x03,0x07,0x06,0xff] +0x02,0x05,0xcc,0xca,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0xc5,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xc5,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0xd1,0xca,0x03,0x01,0x06,0xff] +0x02,0xff,0xd1,0xca,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc8,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xc8,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe0,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xe0,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xec,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xec,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xd2,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xd2,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0xc2,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0xc2,0xca,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc0,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xc0,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xe2,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xe2,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xea,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xea,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xee,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xee,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd4,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xd4,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xf0,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xf0,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd6,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xd6,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xce,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xce,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc6,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xc6,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xca,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xca,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe8,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xe8,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xcc,0xca,0x02,0x07,0x06,0xff] +0xff,0x05,0xcc,0xca,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0xc5,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xc5,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0xd1,0xca,0x02,0x01,0x06,0xff] +0xff,0xff,0xd1,0xca,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc8,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xc8,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe0,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xe0,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xec,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xec,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xd2,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xd2,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0xc2,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0xc2,0xca,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc0,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xc0,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xe2,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xe2,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xea,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xea,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xee,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xee,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd4,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xd4,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xf0,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xf0,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd6,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xd6,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xce,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xce,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc6,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xc6,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xca,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xca,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe8,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xe8,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xcc,0xca,0x04,0x07,0x06,0xff] +0x03,0x05,0xcc,0xca,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0xc5,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xc5,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0xd1,0xca,0x04,0x01,0x06,0xff] +0x03,0xff,0xd1,0xca,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc8,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xc8,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe0,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xe0,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xec,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xec,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xd2,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xd2,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0xc2,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0xc2,0xca,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc0,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xc0,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xe2,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xe2,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xea,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xea,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xee,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xee,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd4,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xd4,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xf0,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xf0,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd6,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xd6,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xce,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xce,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc6,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xc6,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xca,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xca,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe8,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xe8,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xcc,0xca,0x01,0x07,0x06,0xff] +0x04,0x05,0xcc,0xca,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0xc5,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xc5,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0xd1,0xca,0x01,0x01,0x06,0xff] +0x04,0xff,0xd1,0xca,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc8,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc8,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe0,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe0,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xec,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xec,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0xd2,0xca,0x6b,0x06,0x06,0xff] +0x6b,0x04,0xd2,0xca,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0xc2,0xca,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0xc2,0xca,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc0,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc0,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe2,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe2,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xea,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xea,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xee,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xee,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd4,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xd4,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xf0,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xf0,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd6,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xd6,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xce,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xce,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc6,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc6,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xca,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xca,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe8,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe8,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xcc,0xca,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xcc,0xca,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0xc5,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xc5,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0xd1,0xca,0x7e,0x00,0x06,0xff] +0x6b,0xfe,0xd1,0xca,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc8,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc8,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe0,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe0,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xec,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xec,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0xd2,0xca,0x6a,0x06,0x06,0xff] +0x6a,0x04,0xd2,0xca,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0xc2,0xca,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0xc2,0xca,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc0,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc0,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe2,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe2,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xea,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xea,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xee,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xee,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd4,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xd4,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xf0,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xf0,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd6,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xd6,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xce,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xce,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc6,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc6,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xca,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xca,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe8,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe8,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xcc,0xca,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xcc,0xca,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0xc5,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xc5,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0xd1,0xca,0x7f,0x00,0x06,0xff] +0x6a,0xfe,0xd1,0xca,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xec,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xec,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0xc2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc4,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc4,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe2,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xea,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xea,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xee,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xee,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd4,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd4,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xf0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xf0,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd6,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd6,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0xd0,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd0,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xce,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xce,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc6,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc6,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xca,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xca,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe8,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_min_num_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xcc,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xcc,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_add_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x08,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x08,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_add_nc_u32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x20,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x20,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_ashrrev_i32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x2c,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x2c,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_cndmask_b32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x12,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x12,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_fmaak_f32 v6, src_scc, v4, 0xaf123456 ; encoding: [0xc1,0x00,0x02,0xca,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x00,0x02,0xca,0xfd,0x08,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_fmac_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x00,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x00,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x00,0x04,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x00,0x04,0xca,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_lshlrev_b32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x22,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x22,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_lshrrev_b32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x2a,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x2a,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_max_i32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x2e,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x2e,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_max_num_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x14,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x14,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_min_i32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x30,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x30,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_min_num_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x16,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x16,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x00,0x10,0xca,0xfd,0x00,0x06,0xff] +0xc1,0x00,0x10,0xca,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x0e,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x0e,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mul_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x06,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x06,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_sub_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x0a,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x0a,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_sub_nc_u32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x28,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x28,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_subrev_f32 v6, src_scc, v4 ; encoding: [0xc1,0x00,0x0c,0xca,0xfd,0x08,0x06,0xff] +0xc1,0x00,0x0c,0xca,0xfd,0x08,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_add_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x08,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x08,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_add_nc_u32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x20,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x20,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_ashrrev_i32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x2c,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x2c,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_cndmask_b32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x12,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x12,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_fmaak_f32 v6, 0.5, v3, 0xaf123456 ; encoding: [0xf0,0x00,0x02,0xca,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x00,0x02,0xca,0xf0,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_fmac_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x00,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x00,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x00,0x04,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x00,0x04,0xca,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_lshlrev_b32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x22,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x22,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_lshrrev_b32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x2a,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x2a,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_max_i32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x2e,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x2e,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_max_num_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x14,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x14,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_min_i32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x30,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x30,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_min_num_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x16,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x16,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x00,0x10,0xca,0xf0,0x00,0x06,0xff] +0xf0,0x00,0x10,0xca,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x0e,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x0e,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mul_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x06,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x06,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_sub_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x0a,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x0a,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_sub_nc_u32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x28,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x28,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_subrev_f32 v6, 0.5, v3 ; encoding: [0xf0,0x00,0x0c,0xca,0xf0,0x06,0x06,0xff] +0xf0,0x00,0x0c,0xca,0xf0,0x06,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_add_f32 v6, null, v255 ; encoding: [0xff,0x00,0x08,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x08,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_add_nc_u32 v6, null, v255 ; encoding: [0xff,0x00,0x20,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x20,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_ashrrev_i32 v6, null, v255 ; encoding: [0xff,0x00,0x2c,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x2c,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_cndmask_b32 v6, null, v255 ; encoding: [0xff,0x00,0x12,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x12,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmaak_f32 v6, null, v255, 0xaf123456 ; encoding: [0xff,0x00,0x02,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x02,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmac_f32 v6, null, v255 ; encoding: [0xff,0x00,0x00,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x00,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0x00,0x04,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x04,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_lshlrev_b32 v6, null, v255 ; encoding: [0xff,0x00,0x22,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x22,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_lshrrev_b32 v6, null, v255 ; encoding: [0xff,0x00,0x2a,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x2a,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_max_i32 v6, null, v255 ; encoding: [0xff,0x00,0x2e,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x2e,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_max_num_f32 v6, null, v255 ; encoding: [0xff,0x00,0x14,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x14,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_min_i32 v6, null, v255 ; encoding: [0xff,0x00,0x30,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x30,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_min_num_f32 v6, null, v255 ; encoding: [0xff,0x00,0x16,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x16,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0x00,0x10,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x10,0xca,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mul_dx9_zero_f32 v6, null, v255 ; encoding: [0xff,0x00,0x0e,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x0e,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_mul_f32 v6, null, v255 ; encoding: [0xff,0x00,0x06,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x06,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_sub_f32 v6, null, v255 ; encoding: [0xff,0x00,0x0a,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x0a,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_sub_nc_u32 v6, null, v255 ; encoding: [0xff,0x00,0x28,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x28,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, 0xaf123456 :: v_dual_subrev_f32 v6, null, v255 ; encoding: [0xff,0x00,0x0c,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x00,0x0c,0xca,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_add_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x08,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x08,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_add_nc_u32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x20,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x20,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_ashrrev_i32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x2c,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x2c,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_cndmask_b32 v6, exec_hi, v255 ; encoding: [0x7f,0x00,0x12,0xca,0x7f,0xfe,0x07,0xff] +0x7f,0x00,0x12,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_fmaak_f32 v6, exec_hi, v255, 0xaf123456 ; encoding: [0x7f,0x00,0x02,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x00,0x02,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_fmac_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x00,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x00,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0x00,0x04,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x00,0x04,0xca,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_lshlrev_b32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x22,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x22,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_lshrrev_b32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x2a,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x2a,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_max_i32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x2e,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x2e,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_max_num_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x14,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x14,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_min_i32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x30,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x30,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_min_num_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x16,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x16,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0x00,0x10,0xca,0x6b,0x00,0x06,0xff] +0x7f,0x00,0x10,0xca,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x0e,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x0e,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mul_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x06,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x06,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_sub_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x0a,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x0a,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_sub_nc_u32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x28,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x28,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_subrev_f32 v6, vcc_hi, v255 ; encoding: [0x7f,0x00,0x0c,0xca,0x6b,0xfe,0x07,0xff] +0x7f,0x00,0x0c,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_add_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x08,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x08,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_add_nc_u32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x20,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x20,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_ashrrev_i32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x2c,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x2c,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_cndmask_b32 v6, exec_lo, v255 ; encoding: [0x7e,0x00,0x12,0xca,0x7e,0xfe,0x07,0xff] +0x7e,0x00,0x12,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_fmaak_f32 v6, exec_lo, v255, 0xaf123456 ; encoding: [0x7e,0x00,0x02,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x00,0x02,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_fmac_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x00,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x00,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0x00,0x04,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x00,0x04,0xca,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_lshlrev_b32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x22,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x22,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_lshrrev_b32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x2a,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x2a,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_max_i32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x2e,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x2e,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_max_num_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x14,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x14,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_min_i32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x30,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x30,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_min_num_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x16,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x16,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0x00,0x10,0xca,0x7b,0x00,0x06,0xff] +0x7e,0x00,0x10,0xca,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x0e,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x0e,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mul_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x06,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x06,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_sub_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x0a,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x0a,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_sub_nc_u32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x28,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x28,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_subrev_f32 v6, ttmp15, v255 ; encoding: [0x7e,0x00,0x0c,0xca,0x7b,0xfe,0x07,0xff] +0x7e,0x00,0x0c,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_add_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x08,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x08,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_add_nc_u32 v6, m0, v255 ; encoding: [0x7d,0x00,0x20,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x20,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_ashrrev_i32 v6, m0, v255 ; encoding: [0x7d,0x00,0x2c,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x2c,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_cndmask_b32 v6, m0, v255 ; encoding: [0x7d,0x00,0x12,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x12,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_fmaak_f32 v6, m0, v255, 0xaf123456 ; encoding: [0x7d,0x00,0x02,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x00,0x02,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_fmac_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x00,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x00,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0x00,0x04,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x00,0x04,0xca,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_lshlrev_b32 v6, m0, v255 ; encoding: [0x7d,0x00,0x22,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x22,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_lshrrev_b32 v6, m0, v255 ; encoding: [0x7d,0x00,0x2a,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x2a,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_max_i32 v6, m0, v255 ; encoding: [0x7d,0x00,0x2e,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x2e,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_max_num_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x14,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x14,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_min_i32 v6, m0, v255 ; encoding: [0x7d,0x00,0x30,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x30,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_min_num_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x16,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x16,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0x00,0x10,0xca,0x7d,0x00,0x06,0xff] +0x7d,0x00,0x10,0xca,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mul_dx9_zero_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x0e,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x0e,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mul_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x06,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x06,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_sub_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x0a,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x0a,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_sub_nc_u32 v6, m0, v255 ; encoding: [0x7d,0x00,0x28,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x28,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_subrev_f32 v6, m0, v255 ; encoding: [0x7d,0x00,0x0c,0xca,0x7d,0xfe,0x07,0xff] +0x7d,0x00,0x0c,0xca,0x7d,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_add_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x08,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x08,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_add_nc_u32 v6, s105, v255 ; encoding: [0x01,0x00,0x20,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x20,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_ashrrev_i32 v6, s105, v255 ; encoding: [0x01,0x00,0x2c,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x2c,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_cndmask_b32 v6, s1, v255 ; encoding: [0x01,0x00,0x12,0xca,0x01,0xfe,0x07,0xff] +0x01,0x00,0x12,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_fmaak_f32 v6, s1, v255, 0xaf123456 ; encoding: [0x01,0x00,0x02,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0x00,0x02,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_fmac_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x00,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x00,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0x00,0x04,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0x00,0x04,0xca,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_lshlrev_b32 v6, s105, v255 ; encoding: [0x01,0x00,0x22,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x22,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_lshrrev_b32 v6, s105, v255 ; encoding: [0x01,0x00,0x2a,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x2a,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_max_i32 v6, s105, v255 ; encoding: [0x01,0x00,0x2e,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x2e,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_max_num_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x14,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x14,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_min_i32 v6, s105, v255 ; encoding: [0x01,0x00,0x30,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x30,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_min_num_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x16,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x16,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0x00,0x10,0xca,0x69,0x00,0x06,0xff] +0x01,0x00,0x10,0xca,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mul_dx9_zero_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x0e,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x0e,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mul_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x06,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x06,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_sub_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x0a,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x0a,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_sub_nc_u32 v6, s105, v255 ; encoding: [0x01,0x00,0x28,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x28,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_subrev_f32 v6, s105, v255 ; encoding: [0x01,0x00,0x0c,0xca,0x69,0xfe,0x07,0xff] +0x01,0x00,0x0c,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_add_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x08,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x08,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_add_nc_u32 v6, s1, v255 ; encoding: [0x69,0x00,0x20,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x20,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_ashrrev_i32 v6, s1, v255 ; encoding: [0x69,0x00,0x2c,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x2c,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_cndmask_b32 v6, s105, v255 ; encoding: [0x69,0x00,0x12,0xca,0x69,0xfe,0x07,0xff] +0x69,0x00,0x12,0xca,0x69,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_fmaak_f32 v6, s105, v255, 0xaf123456 ; encoding: [0x69,0x00,0x02,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0x00,0x02,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_fmac_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x00,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x00,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0x00,0x04,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0x00,0x04,0xca,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_lshlrev_b32 v6, s1, v255 ; encoding: [0x69,0x00,0x22,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x22,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_lshrrev_b32 v6, s1, v255 ; encoding: [0x69,0x00,0x2a,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x2a,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_max_i32 v6, s1, v255 ; encoding: [0x69,0x00,0x2e,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x2e,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_max_num_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x14,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x14,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_min_i32 v6, s1, v255 ; encoding: [0x69,0x00,0x30,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x30,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_min_num_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x16,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x16,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0x00,0x10,0xca,0x01,0x00,0x06,0xff] +0x69,0x00,0x10,0xca,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mul_dx9_zero_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x0e,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x0e,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mul_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x06,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x06,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_sub_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x0a,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x0a,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_sub_nc_u32 v6, s1, v255 ; encoding: [0x69,0x00,0x28,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x28,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_subrev_f32 v6, s1, v255 ; encoding: [0x69,0x00,0x0c,0xca,0x01,0xfe,0x07,0xff] +0x69,0x00,0x0c,0xca,0x01,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_add_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x08,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x08,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_add_nc_u32 v6, -1, v255 ; encoding: [0xfd,0x00,0x20,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x20,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_ashrrev_i32 v6, -1, v255 ; encoding: [0xfd,0x00,0x2c,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x2c,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_cndmask_b32 v6, -1, v255 ; encoding: [0xfd,0x00,0x12,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x12,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_fmaak_f32 v6, -1, v255, 0xaf123456 ; encoding: [0xfd,0x00,0x02,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x00,0x02,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_fmac_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x00,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x00,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0x00,0x04,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x00,0x04,0xca,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_lshlrev_b32 v6, -1, v255 ; encoding: [0xfd,0x00,0x22,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x22,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_lshrrev_b32 v6, -1, v255 ; encoding: [0xfd,0x00,0x2a,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x2a,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_max_i32 v6, -1, v255 ; encoding: [0xfd,0x00,0x2e,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x2e,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_max_num_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x14,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x14,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_min_i32 v6, -1, v255 ; encoding: [0xfd,0x00,0x30,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x30,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_min_num_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x16,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x16,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0x00,0x10,0xca,0xc1,0x00,0x06,0xff] +0xfd,0x00,0x10,0xca,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mul_dx9_zero_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x0e,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x0e,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mul_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x06,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x06,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_sub_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x0a,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x0a,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_sub_nc_u32 v6, -1, v255 ; encoding: [0xfd,0x00,0x28,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x28,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_subrev_f32 v6, -1, v255 ; encoding: [0xfd,0x00,0x0c,0xca,0xc1,0xfe,0x07,0xff] +0xfd,0x00,0x0c,0xca,0xc1,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_add_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x08,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x08,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_add_nc_u32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x20,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x20,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_ashrrev_i32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x2c,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x2c,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_cndmask_b32 v6, ttmp15, v255 ; encoding: [0x7b,0x00,0x12,0xca,0x7b,0xfe,0x07,0xff] +0x7b,0x00,0x12,0xca,0x7b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_fmaak_f32 v6, ttmp15, v255, 0xaf123456 ; encoding: [0x7b,0x00,0x02,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x00,0x02,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_fmac_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x00,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x00,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0x00,0x04,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x00,0x04,0xca,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_lshlrev_b32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x22,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x22,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_lshrrev_b32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x2a,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x2a,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_max_i32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x2e,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x2e,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_max_num_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x14,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x14,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_min_i32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x30,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x30,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_min_num_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x16,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x16,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0x00,0x10,0xca,0x6a,0x00,0x06,0xff] +0x7b,0x00,0x10,0xca,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x0e,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x0e,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x06,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x06,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x0a,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x0a,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_nc_u32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x28,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x28,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_subrev_f32 v6, vcc_lo, v255 ; encoding: [0x7b,0x00,0x0c,0xca,0x6a,0xfe,0x07,0xff] +0x7b,0x00,0x0c,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_add_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x08,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x08,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_add_nc_u32 v6, v255, v255 ; encoding: [0x01,0x01,0x20,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x20,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_ashrrev_i32 v6, v255, v255 ; encoding: [0x01,0x01,0x2c,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x2c,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_cndmask_b32 v6, v255, v255 ; encoding: [0x01,0x01,0x12,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x12,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_fmaak_f32 v6, v255, v255, 0xaf123456 ; encoding: [0x01,0x01,0x02,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0x01,0x02,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_fmac_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x00,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x00,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0x01,0x04,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0x01,0x04,0xca,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_lshlrev_b32 v6, v255, v255 ; encoding: [0x01,0x01,0x22,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x22,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_lshrrev_b32 v6, v255, v255 ; encoding: [0x01,0x01,0x2a,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x2a,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_max_i32 v6, v255, v255 ; encoding: [0x01,0x01,0x2e,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x2e,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_max_num_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x14,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x14,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_min_i32 v6, v255, v255 ; encoding: [0x01,0x01,0x30,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x30,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_min_num_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x16,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x16,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0x01,0x10,0xca,0xff,0x01,0x06,0xff] +0x01,0x01,0x10,0xca,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mul_dx9_zero_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x0e,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x0e,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mul_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x06,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x06,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_sub_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x0a,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x0a,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_sub_nc_u32 v6, v255, v255 ; encoding: [0x01,0x01,0x28,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x28,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_subrev_f32 v6, v255, v255 ; encoding: [0x01,0x01,0x0c,0xca,0xff,0xff,0x07,0xff] +0x01,0x01,0x0c,0xca,0xff,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_add_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x08,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x08,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_add_nc_u32 v6, v3, v255 ; encoding: [0x02,0x01,0x20,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x20,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_ashrrev_i32 v6, v3, v255 ; encoding: [0x02,0x01,0x2c,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x2c,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_cndmask_b32 v6, v3, v255 ; encoding: [0x02,0x01,0x12,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x12,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_fmaak_f32 v6, v3, v255, 0xaf123456 ; encoding: [0x02,0x01,0x02,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0x01,0x02,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_fmac_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x00,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x00,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0x01,0x04,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0x01,0x04,0xca,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_lshlrev_b32 v6, v3, v255 ; encoding: [0x02,0x01,0x22,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x22,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_lshrrev_b32 v6, v3, v255 ; encoding: [0x02,0x01,0x2a,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x2a,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_max_i32 v6, v3, v255 ; encoding: [0x02,0x01,0x2e,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x2e,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_max_num_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x14,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x14,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_min_i32 v6, v3, v255 ; encoding: [0x02,0x01,0x30,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x30,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_min_num_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x16,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x16,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0x01,0x10,0xca,0x03,0x01,0x06,0xff] +0x02,0x01,0x10,0xca,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x0e,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x0e,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mul_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x06,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x06,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_sub_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x0a,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x0a,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_sub_nc_u32 v6, v3, v255 ; encoding: [0x02,0x01,0x28,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x28,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_subrev_f32 v6, v3, v255 ; encoding: [0x02,0x01,0x0c,0xca,0x03,0xff,0x07,0xff] +0x02,0x01,0x0c,0xca,0x03,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_add_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x08,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x08,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_add_nc_u32 v6, v2, v255 ; encoding: [0xff,0x01,0x20,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x20,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_ashrrev_i32 v6, v2, v255 ; encoding: [0xff,0x01,0x2c,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x2c,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_cndmask_b32 v6, v2, v255 ; encoding: [0xff,0x01,0x12,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x12,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_fmaak_f32 v6, v2, v255, 0xaf123456 ; encoding: [0xff,0x01,0x02,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x01,0x02,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_fmac_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x00,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x00,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0x01,0x04,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0x01,0x04,0xca,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_lshlrev_b32 v6, v2, v255 ; encoding: [0xff,0x01,0x22,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x22,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_lshrrev_b32 v6, v2, v255 ; encoding: [0xff,0x01,0x2a,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x2a,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_max_i32 v6, v2, v255 ; encoding: [0xff,0x01,0x2e,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x2e,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_max_num_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x14,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x14,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_min_i32 v6, v2, v255 ; encoding: [0xff,0x01,0x30,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x30,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_min_num_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x16,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x16,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0x01,0x10,0xca,0x02,0x01,0x06,0xff] +0xff,0x01,0x10,0xca,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mul_dx9_zero_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x0e,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x0e,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mul_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x06,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x06,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_sub_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x0a,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x0a,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_sub_nc_u32 v6, v2, v255 ; encoding: [0xff,0x01,0x28,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x28,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_subrev_f32 v6, v2, v255 ; encoding: [0xff,0x01,0x0c,0xca,0x02,0xff,0x07,0xff] +0xff,0x01,0x0c,0xca,0x02,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_add_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x08,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x08,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_add_nc_u32 v6, v4, v255 ; encoding: [0x03,0x01,0x20,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x20,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_ashrrev_i32 v6, v4, v255 ; encoding: [0x03,0x01,0x2c,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x2c,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_cndmask_b32 v6, v4, v255 ; encoding: [0x03,0x01,0x12,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x12,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_fmaak_f32 v6, v4, v255, 0xaf123456 ; encoding: [0x03,0x01,0x02,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0x01,0x02,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_fmac_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x00,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x00,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0x01,0x04,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0x01,0x04,0xca,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_lshlrev_b32 v6, v4, v255 ; encoding: [0x03,0x01,0x22,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x22,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_lshrrev_b32 v6, v4, v255 ; encoding: [0x03,0x01,0x2a,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x2a,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_max_i32 v6, v4, v255 ; encoding: [0x03,0x01,0x2e,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x2e,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_max_num_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x14,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x14,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_min_i32 v6, v4, v255 ; encoding: [0x03,0x01,0x30,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x30,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_min_num_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x16,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x16,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0x01,0x10,0xca,0x04,0x01,0x06,0xff] +0x03,0x01,0x10,0xca,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mul_dx9_zero_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x0e,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x0e,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mul_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x06,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x06,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_sub_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x0a,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x0a,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_sub_nc_u32 v6, v4, v255 ; encoding: [0x03,0x01,0x28,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x28,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_subrev_f32 v6, v4, v255 ; encoding: [0x03,0x01,0x0c,0xca,0x04,0xff,0x07,0xff] +0x03,0x01,0x0c,0xca,0x04,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_add_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x08,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x08,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_add_nc_u32 v6, v1, v255 ; encoding: [0x04,0x01,0x20,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x20,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_ashrrev_i32 v6, v1, v255 ; encoding: [0x04,0x01,0x2c,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x2c,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v6, v1, v255 ; encoding: [0x04,0x01,0x12,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x12,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_fmaak_f32 v6, v1, v255, 0xaf123456 ; encoding: [0x04,0x01,0x02,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0x01,0x02,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_fmac_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x00,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x00,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0x01,0x04,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0x01,0x04,0xca,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_lshlrev_b32 v6, v1, v255 ; encoding: [0x04,0x01,0x22,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x22,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_lshrrev_b32 v6, v1, v255 ; encoding: [0x04,0x01,0x2a,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x2a,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_max_i32 v6, v1, v255 ; encoding: [0x04,0x01,0x2e,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x2e,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_max_num_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x14,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x14,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_min_i32 v6, v1, v255 ; encoding: [0x04,0x01,0x30,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x30,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_min_num_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x16,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x16,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0x01,0x10,0xca,0x01,0x01,0x06,0xff] +0x04,0x01,0x10,0xca,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mul_dx9_zero_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x0e,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x0e,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mul_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x06,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x06,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_sub_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x0a,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x0a,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_sub_nc_u32 v6, v1, v255 ; encoding: [0x04,0x01,0x28,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x28,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_subrev_f32 v6, v1, v255 ; encoding: [0x04,0x01,0x0c,0xca,0x01,0xff,0x07,0xff] +0x04,0x01,0x0c,0xca,0x01,0xff,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_add_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x08,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x08,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_add_nc_u32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x20,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x20,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_ashrrev_i32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x2c,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x2c,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_cndmask_b32 v6, vcc_hi, v255 ; encoding: [0x6b,0x00,0x12,0xca,0x6b,0xfe,0x07,0xff] +0x6b,0x00,0x12,0xca,0x6b,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_fmaak_f32 v6, vcc_hi, v255, 0xaf123456 ; encoding: [0x6b,0x00,0x02,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x00,0x02,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_fmac_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x00,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x00,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0x00,0x04,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x00,0x04,0xca,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_lshlrev_b32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x22,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x22,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_lshrrev_b32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x2a,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x2a,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_max_i32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x2e,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x2e,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_max_num_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x14,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x14,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_min_i32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x30,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x30,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_min_num_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x16,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x16,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0x00,0x10,0xca,0x7e,0x00,0x06,0xff] +0x6b,0x00,0x10,0xca,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x0e,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x0e,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x06,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x06,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x0a,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x0a,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_nc_u32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x28,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x28,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_subrev_f32 v6, exec_lo, v255 ; encoding: [0x6b,0x00,0x0c,0xca,0x7e,0xfe,0x07,0xff] +0x6b,0x00,0x0c,0xca,0x7e,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_add_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x08,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x08,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_add_nc_u32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x20,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x20,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_ashrrev_i32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x2c,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x2c,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_cndmask_b32 v6, vcc_lo, v255 ; encoding: [0x6a,0x00,0x12,0xca,0x6a,0xfe,0x07,0xff] +0x6a,0x00,0x12,0xca,0x6a,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_fmaak_f32 v6, vcc_lo, v255, 0xaf123456 ; encoding: [0x6a,0x00,0x02,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x00,0x02,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_fmac_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x00,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x00,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0x00,0x04,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x00,0x04,0xca,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_lshlrev_b32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x22,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x22,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_lshrrev_b32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x2a,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x2a,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_max_i32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x2e,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x2e,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_max_num_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x14,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x14,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_min_i32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x30,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x30,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_min_num_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x16,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x16,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0x00,0x10,0xca,0x7f,0x00,0x06,0xff] +0x6a,0x00,0x10,0xca,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x0e,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x0e,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x06,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x06,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x0a,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x0a,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_nc_u32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x28,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x28,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_subrev_f32 v6, exec_hi, v255 ; encoding: [0x6a,0x00,0x0c,0xca,0x7f,0xfe,0x07,0xff] +0x6a,0x00,0x0c,0xca,0x7f,0xfe,0x07,0xff + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_add_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x08,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x08,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_add_nc_u32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x20,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x20,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_ashrrev_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x2c,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x2c,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_cndmask_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x12,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x12,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_fmaak_f32 v255, 0xaf123456, v5, 0xaf123456 ; encoding: [0x7c,0x00,0x02,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x02,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_fmac_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x00,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x00,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x00,0x04,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x04,0xca,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_lshlrev_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x22,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x22,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_lshrrev_b32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x2a,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x2a,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_max_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x2e,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x2e,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_max_num_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x14,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x14,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_min_i32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x30,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x30,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_min_num_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x16,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x16,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x00,0x10,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x10,0xca,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x0e,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x0e,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_mul_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x06,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x06,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_sub_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x0a,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x0a,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_sub_nc_u32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x28,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x28,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mov_b32 v6, null :: v_dual_subrev_f32 v255, 0xaf123456, v5 ; encoding: [0x7c,0x00,0x0c,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x00,0x0c,0xca,0xff,0x0a,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc8,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc8,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe0,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe0,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xec,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xec,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd2,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd2,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0xc2,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0xc2,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc0,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc0,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0xc4,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0xc4,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe2,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe2,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xea,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xea,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xee,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xee,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd4,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd4,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xf0,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xf0,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd6,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd6,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0xd0,0xc9,0xfd,0x00,0x06,0xff] +0xc1,0x08,0xd0,0xc9,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xce,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xce,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc6,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc6,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xca,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xca,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe8,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe8,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xcc,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xcc,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc8,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc8,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe0,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe0,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xec,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xec,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd2,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd2,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0xc2,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0xc2,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc0,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc0,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0xc4,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0xc4,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe2,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe2,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xea,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xea,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xee,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xee,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd4,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd4,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xf0,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xf0,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd6,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd6,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0xd0,0xc9,0xf0,0x00,0x06,0xff] +0xf0,0x06,0xd0,0xc9,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xce,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xce,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc6,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc6,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xca,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xca,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe8,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe8,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xcc,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xcc,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xec,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xec,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0xd2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0xc2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xe2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xea,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xea,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xee,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xee,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd4,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd4,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xf0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xf0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd6,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd6,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0xce,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xce,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc6,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc6,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0xca,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xca,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0xcc,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xcc,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xc5,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xc5,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0xd1,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xd1,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc8,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc8,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe0,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe0,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xec,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xec,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0xd2,0xc9,0x7f,0x06,0x06,0xff] +0x7f,0x04,0xd2,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0xc2,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0xc2,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc0,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc0,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe2,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe2,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xea,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xea,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xee,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xee,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd4,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xd4,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xf0,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xf0,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd6,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xd6,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xce,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xce,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc6,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc6,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xca,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xca,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe8,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe8,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xcc,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xcc,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0xc5,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xc5,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0xd1,0xc9,0x6b,0x00,0x06,0xff] +0x7f,0xfe,0xd1,0xc9,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc8,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc8,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe0,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe0,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xec,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xec,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0xd2,0xc9,0x7e,0x06,0x06,0xff] +0x7e,0x04,0xd2,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0xc2,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0xc2,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc0,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc0,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe2,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe2,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xea,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xea,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xee,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xee,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd4,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xd4,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xf0,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xf0,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd6,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xd6,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xce,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xce,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc6,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc6,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xca,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xca,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe8,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe8,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xcc,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xcc,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0xc5,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xc5,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0xd1,0xc9,0x7b,0x00,0x06,0xff] +0x7e,0xfe,0xd1,0xc9,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc8,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc8,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe0,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe0,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xec,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xec,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd2,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd2,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0xc2,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0xc2,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc0,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc0,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe2,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe2,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xea,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xea,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xee,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xee,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd4,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd4,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xf0,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xf0,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd6,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd6,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xce,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xce,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc6,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc6,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xca,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xca,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe8,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe8,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xcc,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xcc,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0xc5,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xc5,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0xd1,0xc9,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0xd1,0xc9,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc8,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xc8,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe0,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xe0,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xec,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xec,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0xd2,0xc9,0x01,0x06,0x06,0xff] +0x01,0x04,0xd2,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0xc2,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0xc2,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc0,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xc0,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xe2,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xe2,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xea,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xea,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xee,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xee,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd4,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xd4,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xf0,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xf0,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd6,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xd6,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xce,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xce,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc6,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xc6,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xca,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xca,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe8,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xe8,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xcc,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xcc,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0xc5,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xc5,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0xd1,0xc9,0x69,0x00,0x06,0xff] +0x01,0xfe,0xd1,0xc9,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc8,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xc8,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe0,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xe0,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xec,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xec,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0xd2,0xc9,0x69,0x06,0x06,0xff] +0x69,0x04,0xd2,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0xc2,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0xc2,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc0,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xc0,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xe2,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xe2,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xea,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xea,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xee,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xee,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd4,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xd4,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xf0,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xf0,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd6,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xd6,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xce,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xce,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc6,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xc6,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xca,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xca,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe8,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xe8,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xcc,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xcc,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0xc5,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xc5,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0xd1,0xc9,0x01,0x00,0x06,0xff] +0x69,0xfe,0xd1,0xc9,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc8,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc8,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe0,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe0,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xec,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xec,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd2,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd2,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0xc2,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0xc2,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc0,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc0,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe2,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe2,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xea,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xea,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xee,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xee,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd4,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd4,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xf0,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xf0,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd6,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd6,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xce,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xce,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc6,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc6,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xca,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xca,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe8,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe8,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xcc,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xcc,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0xc5,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xc5,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0xd1,0xc9,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0xd1,0xc9,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc8,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc8,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe0,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe0,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xec,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xec,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0xd2,0xc9,0x7b,0x06,0x06,0xff] +0x7b,0x04,0xd2,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0xc2,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0xc2,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc0,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc0,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe2,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe2,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xea,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xea,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xee,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xee,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd4,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xd4,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xf0,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xf0,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd6,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xd6,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xce,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xce,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc6,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc6,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xca,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xca,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe8,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe8,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xcc,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xcc,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0xc5,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xc5,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0xd1,0xc9,0x6a,0x00,0x06,0xff] +0x7b,0xfe,0xd1,0xc9,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc8,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xc8,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe0,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xe0,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xec,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xec,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xd2,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xd2,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0xc2,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0xc2,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc0,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xc0,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xe2,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xe2,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xea,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xea,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xee,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xee,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd4,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xd4,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xf0,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xf0,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd6,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xd6,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xce,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xce,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc6,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xc6,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xca,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xca,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe8,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xe8,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xcc,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xcc,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0xc5,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xc5,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0xd1,0xc9,0xff,0x01,0x06,0xff] +0x01,0xff,0xd1,0xc9,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc8,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xc8,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe0,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xe0,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xec,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xec,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xd2,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xd2,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0xc2,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0xc2,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc0,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xc0,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xe2,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xe2,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xea,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xea,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xee,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xee,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd4,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xd4,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xf0,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xf0,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd6,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xd6,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xce,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xce,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc6,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xc6,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xca,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xca,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe8,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xe8,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xcc,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xcc,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0xc5,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xc5,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0xd1,0xc9,0x03,0x01,0x06,0xff] +0x02,0xff,0xd1,0xc9,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc8,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xc8,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe0,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xe0,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xec,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xec,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xd2,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xd2,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0xc2,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0xc2,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc0,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xc0,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xe2,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xe2,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xea,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xea,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xee,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xee,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd4,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xd4,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xf0,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xf0,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd6,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xd6,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xce,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xce,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc6,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xc6,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xca,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xca,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe8,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xe8,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xcc,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xcc,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0xc5,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xc5,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0xd1,0xc9,0x02,0x01,0x06,0xff] +0xff,0xff,0xd1,0xc9,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc8,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xc8,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe0,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xe0,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xec,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xec,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xd2,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xd2,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0xc2,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0xc2,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc0,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xc0,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xe2,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xe2,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xea,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xea,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xee,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xee,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd4,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xd4,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xf0,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xf0,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd6,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xd6,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xce,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xce,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc6,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xc6,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xca,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xca,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe8,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xe8,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xcc,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xcc,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0xc5,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xc5,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0xd1,0xc9,0x04,0x01,0x06,0xff] +0x03,0xff,0xd1,0xc9,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc8,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xc8,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe0,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xe0,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xec,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xec,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xd2,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xd2,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0xc2,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0xc2,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc0,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xc0,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xe2,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xe2,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xea,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xea,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xee,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xee,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd4,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xd4,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xf0,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xf0,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd6,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xd6,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xce,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xce,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc6,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xc6,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xca,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xca,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe8,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xe8,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xcc,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xcc,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0xc5,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xc5,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0xd1,0xc9,0x01,0x01,0x06,0xff] +0x04,0xff,0xd1,0xc9,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc8,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc8,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe0,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe0,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xec,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xec,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0xd2,0xc9,0x6b,0x06,0x06,0xff] +0x6b,0x04,0xd2,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0xc2,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0xc2,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc0,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc0,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe2,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe2,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xea,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xea,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xee,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xee,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd4,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xd4,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xf0,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xf0,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd6,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xd6,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xce,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xce,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc6,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc6,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xca,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xca,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe8,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe8,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xcc,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xcc,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0xc5,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xc5,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0xd1,0xc9,0x7e,0x00,0x06,0xff] +0x6b,0xfe,0xd1,0xc9,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc8,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc8,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe0,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe0,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xec,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xec,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0xd2,0xc9,0x6a,0x06,0x06,0xff] +0x6a,0x04,0xd2,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0xc2,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0xc2,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc0,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc0,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe2,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe2,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xea,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xea,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xee,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xee,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd4,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xd4,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xf0,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xf0,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd6,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xd6,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xce,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xce,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc6,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc6,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xca,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xca,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe8,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe8,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xcc,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xcc,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0xc5,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xc5,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0xd1,0xc9,0x7f,0x00,0x06,0xff] +0x6a,0xfe,0xd1,0xc9,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xec,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xec,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0xc2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc4,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc4,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xea,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xea,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xee,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xee,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd4,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd4,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xf0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xf0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd6,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd6,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0xd0,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd0,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xce,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xce,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc6,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc6,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xca,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xca,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_dx9_zero_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xcc,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xcc,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc8,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc8,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe0,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe0,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xec,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xec,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd2,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd2,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0xc2,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0xc2,0xc8,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc0,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc0,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0xc4,0xc8,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0xc4,0xc8,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe2,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe2,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xea,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xea,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xee,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xee,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd4,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd4,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xf0,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xf0,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xd6,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xd6,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0xd0,0xc8,0xfd,0x00,0x06,0xff] +0xc1,0x08,0xd0,0xc8,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xce,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xce,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xc6,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xc6,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xca,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xca,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xe8,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xe8,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xcc,0xc8,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xcc,0xc8,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc8,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc8,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe0,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe0,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xec,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xec,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd2,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd2,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0xc2,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0xc2,0xc8,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc0,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc0,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0xc4,0xc8,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0xc4,0xc8,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe2,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe2,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xea,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xea,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xee,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xee,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd4,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd4,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xf0,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xf0,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xd6,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xd6,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0xd0,0xc8,0xf0,0x00,0x06,0xff] +0xf0,0x06,0xd0,0xc8,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xce,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xce,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xc6,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xc6,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xca,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xca,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xe8,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xe8,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xcc,0xc8,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xcc,0xc8,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc8,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc8,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xec,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xec,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0xd2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0xc2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xe2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe2,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xea,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xea,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xee,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xee,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd4,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd4,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xf0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xf0,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0xd6,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xd6,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0xce,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xce,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0xc6,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xc6,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0xca,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xca,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xe8,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xe8,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0xcc,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xcc,0xc8,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xc5,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xc5,0xc8,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0xd1,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0xd1,0xc8,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc8,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc8,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe0,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe0,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xec,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xec,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0xd2,0xc8,0x7f,0x06,0x06,0xff] +0x7f,0x04,0xd2,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0xc2,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0xc2,0xc8,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc0,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc0,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe2,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe2,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xea,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xea,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xee,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xee,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd4,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xd4,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xf0,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xf0,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xd6,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xd6,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xce,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xce,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xc6,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xc6,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xca,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xca,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xe8,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xe8,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xcc,0xc8,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xcc,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0xc5,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0xc5,0xc8,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0xd1,0xc8,0x6b,0x00,0x06,0xff] +0x7f,0xfe,0xd1,0xc8,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc8,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc8,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe0,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe0,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xec,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xec,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0xd2,0xc8,0x7e,0x06,0x06,0xff] +0x7e,0x04,0xd2,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0xc2,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0xc2,0xc8,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc0,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc0,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe2,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe2,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xea,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xea,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xee,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xee,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd4,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xd4,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xf0,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xf0,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xd6,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xd6,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xce,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xce,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xc6,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xc6,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xca,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xca,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xe8,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xe8,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xcc,0xc8,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xcc,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0xc5,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0xc5,0xc8,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0xd1,0xc8,0x7b,0x00,0x06,0xff] +0x7e,0xfe,0xd1,0xc8,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc8,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc8,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe0,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe0,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xec,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xec,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd2,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd2,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0xc2,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0xc2,0xc8,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc0,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc0,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe2,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe2,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xea,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xea,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xee,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xee,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd4,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd4,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xf0,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xf0,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xd6,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xd6,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xce,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xce,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xc6,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xc6,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xca,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xca,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xe8,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xe8,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0xcc,0xc8,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xcc,0xc8,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0xc5,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0xc5,0xc8,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0xd1,0xc8,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0xd1,0xc8,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc8,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xc8,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe0,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xe0,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xec,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xec,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0xd2,0xc8,0x01,0x06,0x06,0xff] +0x01,0x04,0xd2,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0xc2,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0xc2,0xc8,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc0,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xc0,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xe2,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xe2,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xea,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xea,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xee,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xee,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd4,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xd4,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xf0,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xf0,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xd6,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xd6,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xce,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xce,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xc6,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xc6,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xca,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xca,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xe8,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xe8,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0xcc,0xc8,0x69,0x06,0x06,0xff] +0x01,0x04,0xcc,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0xc5,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0xc5,0xc8,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0xd1,0xc8,0x69,0x00,0x06,0xff] +0x01,0xfe,0xd1,0xc8,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc8,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xc8,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe0,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xe0,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xec,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xec,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0xd2,0xc8,0x69,0x06,0x06,0xff] +0x69,0x04,0xd2,0xc8,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0xc2,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0xc2,0xc8,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc0,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xc0,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xe2,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xe2,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xea,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xea,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xee,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xee,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd4,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xd4,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xf0,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xf0,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xd6,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xd6,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xce,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xce,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xc6,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xc6,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xca,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xca,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xe8,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xe8,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0xcc,0xc8,0x01,0x06,0x06,0xff] +0x69,0x04,0xcc,0xc8,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0xc5,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0xc5,0xc8,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0xd1,0xc8,0x01,0x00,0x06,0xff] +0x69,0xfe,0xd1,0xc8,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc8,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc8,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe0,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe0,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xec,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xec,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd2,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd2,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0xc2,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0xc2,0xc8,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc0,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc0,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe2,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe2,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xea,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xea,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xee,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xee,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd4,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd4,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xf0,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xf0,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xd6,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xd6,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xce,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xce,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xc6,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xc6,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xca,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xca,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xe8,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xe8,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0xcc,0xc8,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xcc,0xc8,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0xc5,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0xc5,0xc8,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0xd1,0xc8,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0xd1,0xc8,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc8,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc8,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe0,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe0,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xec,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xec,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0xd2,0xc8,0x7b,0x06,0x06,0xff] +0x7b,0x04,0xd2,0xc8,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0xc2,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0xc2,0xc8,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc0,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc0,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe2,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe2,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xea,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xea,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xee,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xee,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd4,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xd4,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xf0,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xf0,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xd6,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xd6,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xce,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xce,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xc6,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xc6,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xca,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xca,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xe8,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xe8,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xcc,0xc8,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xcc,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0xc5,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0xc5,0xc8,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0xd1,0xc8,0x6a,0x00,0x06,0xff] +0x7b,0xfe,0xd1,0xc8,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc8,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xc8,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe0,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xe0,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xec,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xec,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xd2,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xd2,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0xc2,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0xc2,0xc8,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc0,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xc0,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xe2,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xe2,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xea,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xea,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xee,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xee,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd4,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xd4,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xf0,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xf0,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xd6,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xd6,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xce,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xce,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xc6,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xc6,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xca,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xca,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xe8,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xe8,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0xcc,0xc8,0xff,0x07,0x06,0xff] +0x01,0x05,0xcc,0xc8,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0xc5,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0xc5,0xc8,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0xd1,0xc8,0xff,0x01,0x06,0xff] +0x01,0xff,0xd1,0xc8,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc8,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xc8,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe0,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xe0,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xec,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xec,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xd2,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xd2,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0xc2,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0xc2,0xc8,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc0,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xc0,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xe2,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xe2,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xea,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xea,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xee,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xee,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd4,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xd4,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xf0,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xf0,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xd6,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xd6,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xce,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xce,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xc6,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xc6,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xca,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xca,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xe8,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xe8,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0xcc,0xc8,0x03,0x07,0x06,0xff] +0x02,0x05,0xcc,0xc8,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0xc5,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0xc5,0xc8,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0xd1,0xc8,0x03,0x01,0x06,0xff] +0x02,0xff,0xd1,0xc8,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc8,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xc8,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe0,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xe0,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xec,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xec,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xd2,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xd2,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0xc2,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0xc2,0xc8,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc0,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xc0,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xe2,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xe2,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xea,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xea,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xee,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xee,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd4,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xd4,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xf0,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xf0,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xd6,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xd6,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xce,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xce,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xc6,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xc6,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xca,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xca,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xe8,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xe8,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0xcc,0xc8,0x02,0x07,0x06,0xff] +0xff,0x05,0xcc,0xc8,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0xc5,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0xc5,0xc8,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0xd1,0xc8,0x02,0x01,0x06,0xff] +0xff,0xff,0xd1,0xc8,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc8,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xc8,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe0,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xe0,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xec,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xec,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xd2,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xd2,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0xc2,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0xc2,0xc8,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc0,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xc0,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xe2,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xe2,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xea,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xea,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xee,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xee,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd4,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xd4,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xf0,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xf0,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xd6,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xd6,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xce,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xce,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xc6,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xc6,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xca,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xca,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xe8,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xe8,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0xcc,0xc8,0x04,0x07,0x06,0xff] +0x03,0x05,0xcc,0xc8,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0xc5,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0xc5,0xc8,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0xd1,0xc8,0x04,0x01,0x06,0xff] +0x03,0xff,0xd1,0xc8,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc8,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xc8,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe0,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xe0,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xec,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xec,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xd2,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xd2,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0xc2,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0xc2,0xc8,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc0,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xc0,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xe2,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xe2,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xea,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xea,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xee,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xee,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd4,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xd4,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xf0,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xf0,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xd6,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xd6,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xce,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xce,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xc6,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xc6,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xca,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xca,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xe8,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xe8,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0xcc,0xc8,0x01,0x07,0x06,0xff] +0x04,0x05,0xcc,0xc8,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0xc5,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0xc5,0xc8,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0xd1,0xc8,0x01,0x01,0x06,0xff] +0x04,0xff,0xd1,0xc8,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc8,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc8,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe0,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe0,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xec,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xec,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0xd2,0xc8,0x6b,0x06,0x06,0xff] +0x6b,0x04,0xd2,0xc8,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0xc2,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0xc2,0xc8,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc0,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc0,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe2,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe2,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xea,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xea,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xee,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xee,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd4,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xd4,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xf0,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xf0,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xd6,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xd6,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xce,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xce,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xc6,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xc6,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xca,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xca,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xe8,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xe8,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xcc,0xc8,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xcc,0xc8,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0xc5,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0xc5,0xc8,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0xd1,0xc8,0x7e,0x00,0x06,0xff] +0x6b,0xfe,0xd1,0xc8,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc8,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc8,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe0,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe0,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xec,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xec,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0xd2,0xc8,0x6a,0x06,0x06,0xff] +0x6a,0x04,0xd2,0xc8,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0xc2,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0xc2,0xc8,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc0,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc0,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe2,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe2,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xea,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xea,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xee,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xee,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd4,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xd4,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xf0,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xf0,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xd6,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xd6,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xce,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xce,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xc6,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xc6,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xca,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xca,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xe8,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xe8,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xcc,0xc8,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xcc,0xc8,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0xc5,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0xc5,0xc8,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0xd1,0xc8,0x7f,0x00,0x06,0xff] +0x6a,0xfe,0xd1,0xc8,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc8,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc8,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xec,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xec,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0xc2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc4,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc4,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe2,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xea,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xea,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xee,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xee,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd4,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd4,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xf0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xf0,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xd6,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd6,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0xd0,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xd0,0xc8,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xce,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xce,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xc6,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xc6,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xca,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xca,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xe8,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xe8,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_mul_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xcc,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xcc,0xc8,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x48,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x48,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x60,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x60,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6c,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x6c,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x52,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x52,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x42,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x42,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x40,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x40,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x44,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x44,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x62,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x62,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6a,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x6a,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x6e,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x6e,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x54,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x54,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x70,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x70,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x56,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x56,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x50,0xc9,0xfd,0x00,0x06,0xff] +0xc1,0x08,0x50,0xc9,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4e,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x4e,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x46,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x46,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4a,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x4a,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x68,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x68,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x4c,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x4c,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x48,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x48,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x60,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x60,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6c,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x6c,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x52,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x52,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x42,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x42,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x40,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x40,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x44,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x44,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x62,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x62,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6a,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x6a,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x6e,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x6e,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x54,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x54,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x70,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x70,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x56,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x56,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x50,0xc9,0xf0,0x00,0x06,0xff] +0xf0,0x06,0x50,0xc9,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4e,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x4e,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x46,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x46,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4a,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x4a,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x68,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x68,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x4c,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x4c,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x48,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x48,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x60,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x60,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x52,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x52,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x42,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x42,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x40,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x40,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x62,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x62,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0x6a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0x6e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x6e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x54,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x54,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0x70,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x70,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x56,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x56,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x46,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x46,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0x68,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x68,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x4c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x4c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x45,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x45,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x51,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x51,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x48,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x48,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x60,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x60,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x6c,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x6c,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x52,0xc9,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x52,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x42,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x42,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x40,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x40,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x62,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x62,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x6a,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x6a,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x6e,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x6e,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x54,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x54,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x70,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x70,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x56,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x56,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x4e,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x4e,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x46,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x46,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x4a,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x4a,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x68,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x68,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x4c,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x4c,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x45,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x45,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x51,0xc9,0x6b,0x00,0x06,0xff] +0x7f,0xfe,0x51,0xc9,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x48,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x48,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x60,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x60,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x6c,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x6c,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x52,0xc9,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x52,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x42,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x42,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x40,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x40,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x62,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x62,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x6a,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x6a,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x6e,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x6e,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x54,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x54,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x70,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x70,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x56,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x56,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x4e,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x4e,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x46,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x46,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x4a,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x4a,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x68,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x68,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x4c,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x4c,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x45,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x45,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x51,0xc9,0x7b,0x00,0x06,0xff] +0x7e,0xfe,0x51,0xc9,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x48,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x48,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x60,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x60,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6c,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x6c,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x52,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x52,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x42,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x42,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x40,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x40,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x62,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x62,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6a,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x6a,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x6e,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x6e,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x54,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x54,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0x70,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x70,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x56,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x56,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4e,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x4e,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x46,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x46,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4a,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x4a,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0x68,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x68,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x4c,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x4c,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x45,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x45,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x51,0xc9,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0x51,0xc9,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x48,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x48,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x60,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x60,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x6c,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x6c,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x52,0xc9,0x01,0x06,0x06,0xff] +0x01,0x04,0x52,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x42,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x42,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x40,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x40,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x62,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x62,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0x6a,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x6a,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x6e,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x6e,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x54,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x54,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0x70,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x70,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x56,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x56,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x4e,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x4e,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x46,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x46,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x4a,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x4a,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0x68,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x68,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x4c,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x4c,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x45,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x45,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x51,0xc9,0x69,0x00,0x06,0xff] +0x01,0xfe,0x51,0xc9,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x48,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x48,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x60,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x60,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x6c,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x6c,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x52,0xc9,0x69,0x06,0x06,0xff] +0x69,0x04,0x52,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x42,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x42,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x40,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x40,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x62,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x62,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0x6a,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x6a,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x6e,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x6e,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x54,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x54,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0x70,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x70,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x56,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x56,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x4e,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x4e,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x46,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x46,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x4a,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x4a,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0x68,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x68,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x4c,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x4c,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x45,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x45,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x51,0xc9,0x01,0x00,0x06,0xff] +0x69,0xfe,0x51,0xc9,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x48,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x48,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x60,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x60,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6c,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x6c,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x52,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x52,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x42,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x42,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x40,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x40,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x62,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x62,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6a,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x6a,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x6e,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x6e,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x54,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x54,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0x70,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x70,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x56,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x56,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4e,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x4e,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x46,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x46,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4a,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x4a,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0x68,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x68,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x4c,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x4c,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x45,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x45,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x51,0xc9,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0x51,0xc9,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x48,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x48,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x60,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x60,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x6c,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x6c,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x52,0xc9,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x52,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x42,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x42,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x40,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x40,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x62,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x62,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x6a,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x6a,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x6e,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x6e,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x54,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x54,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x70,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x70,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x56,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x56,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x4e,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x4e,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x46,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x46,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x4a,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x4a,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x68,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x68,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x4c,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x4c,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x45,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x45,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x51,0xc9,0x6a,0x00,0x06,0xff] +0x7b,0xfe,0x51,0xc9,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x48,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x48,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x60,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x60,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6c,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x6c,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x52,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x52,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x42,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x42,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x40,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x40,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x62,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x62,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x6a,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x6a,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x6e,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x6e,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x54,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x54,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0x70,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x70,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x56,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x56,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4e,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x4e,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x46,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x46,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4a,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x4a,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0x68,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x68,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x4c,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x4c,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x45,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x45,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x51,0xc9,0xff,0x01,0x06,0xff] +0x01,0xff,0x51,0xc9,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x48,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x48,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x60,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x60,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6c,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x6c,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x52,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x52,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x42,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x42,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x40,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x40,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x62,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x62,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x6a,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x6a,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x6e,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x6e,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x54,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x54,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0x70,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x70,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x56,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x56,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4e,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x4e,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x46,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x46,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4a,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x4a,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0x68,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x68,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x4c,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x4c,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x45,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x45,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x51,0xc9,0x03,0x01,0x06,0xff] +0x02,0xff,0x51,0xc9,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x48,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x48,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x60,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x60,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6c,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x6c,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x52,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x52,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x42,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x42,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x40,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x40,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x62,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x62,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x6a,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x6a,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x6e,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x6e,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x54,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x54,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0x70,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x70,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x56,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x56,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4e,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x4e,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x46,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x46,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4a,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x4a,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0x68,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x68,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x4c,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x4c,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x45,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x45,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x51,0xc9,0x02,0x01,0x06,0xff] +0xff,0xff,0x51,0xc9,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x48,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x48,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x60,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x60,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6c,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x6c,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x52,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x52,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x42,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x42,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x40,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x40,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x62,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x62,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x6a,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x6a,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x6e,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x6e,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x54,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x54,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0x70,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x70,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x56,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x56,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4e,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x4e,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x46,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x46,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4a,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x4a,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0x68,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x68,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x4c,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x4c,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x45,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x45,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x51,0xc9,0x04,0x01,0x06,0xff] +0x03,0xff,0x51,0xc9,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x48,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x48,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x60,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x60,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6c,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x6c,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x52,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x52,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x42,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x42,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x40,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x40,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x62,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x62,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x6a,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x6a,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x6e,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x6e,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x54,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x54,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0x70,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x70,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x56,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x56,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4e,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x4e,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x46,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x46,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4a,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x4a,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0x68,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x68,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x4c,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x4c,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x45,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x45,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x51,0xc9,0x01,0x01,0x06,0xff] +0x04,0xff,0x51,0xc9,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x48,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x48,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x60,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x60,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x6c,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x6c,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x52,0xc9,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x52,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x42,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x42,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x40,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x40,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x62,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x62,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x6a,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x6a,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x6e,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x6e,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x54,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x54,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x70,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x70,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x56,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x56,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x4e,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x4e,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x46,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x46,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x4a,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x4a,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x68,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x68,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x4c,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x4c,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x45,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x45,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x51,0xc9,0x7e,0x00,0x06,0xff] +0x6b,0xfe,0x51,0xc9,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x48,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x48,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x60,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x60,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x6c,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x6c,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x52,0xc9,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x52,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x42,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x42,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x40,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x40,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x62,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x62,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x6a,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x6a,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x6e,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x6e,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x54,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x54,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x70,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x70,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x56,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x56,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x4e,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x4e,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x46,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x46,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x4a,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x4a,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x68,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x68,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x4c,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x4c,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x45,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x45,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x51,0xc9,0x7f,0x00,0x06,0xff] +0x6a,0xfe,0x51,0xc9,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x48,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x48,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x60,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x60,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x52,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x52,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x42,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x42,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x40,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x40,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x44,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x44,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x62,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x62,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x6e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x6e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x54,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x54,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x70,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x70,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x56,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x56,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x50,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x50,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x46,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x46,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x68,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x68,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_sub_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x4c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x4c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x88,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x88,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa0,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xa0,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xac,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xac,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_cndmask_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x92,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x92,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmaak_f32 v6, src_scc, v5, 0xaf123456 ; encoding: [0xc1,0x08,0x82,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x82,0xc9,0xfd,0x0a,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmac_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x80,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x80,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmamk_f32 v6, src_scc, 0xaf123456, v255 ; encoding: [0xc1,0x08,0x84,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xc1,0x08,0x84,0xc9,0xfd,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa2,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xa2,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xaa,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xaa,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xae,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xae,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x94,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x94,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_i32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xb0,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xb0,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_num_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x96,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x96,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mov_b32 v6, src_scc ; encoding: [0xc1,0x08,0x90,0xc9,0xfd,0x00,0x06,0xff] +0xc1,0x08,0x90,0xc9,0xfd,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8e,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x8e,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x86,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x86,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8a,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x8a,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0xa8,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0xa8,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v6, src_scc, v5 ; encoding: [0xc1,0x08,0x8c,0xc9,0xfd,0x0a,0x06,0xff] +0xc1,0x08,0x8c,0xc9,0xfd,0x0a,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x88,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x88,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa0,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xa0,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xac,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xac,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x92,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x92,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmaak_f32 v6, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x06,0x82,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x82,0xc9,0xf0,0x04,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x80,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x80,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmamk_f32 v6, 0.5, 0xaf123456, v255 ; encoding: [0xf0,0x06,0x84,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xf0,0x06,0x84,0xc9,0xf0,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa2,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xa2,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xaa,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xaa,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xae,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xae,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x94,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x94,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_i32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xb0,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xb0,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x96,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x96,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mov_b32 v6, 0.5 ; encoding: [0xf0,0x06,0x90,0xc9,0xf0,0x00,0x06,0xff] +0xf0,0x06,0x90,0xc9,0xf0,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8e,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x8e,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x86,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x86,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8a,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x8a,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0xa8,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0xa8,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v6, 0.5, v2 ; encoding: [0xf0,0x06,0x8c,0xc9,0xf0,0x04,0x06,0xff] +0xf0,0x06,0x8c,0xc9,0xf0,0x04,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_add_f32 v6, null, v3 ; encoding: [0xff,0x04,0x88,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x88,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_add_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xa0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_ashrrev_i32 v6, null, v3 ; encoding: [0xff,0x04,0xac,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xac,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_cndmask_b32 v6, null, v3 ; encoding: [0xff,0x04,0x92,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x92,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, null, v3, 0xaf123456 ; encoding: [0xff,0x04,0x82,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x82,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_fmac_f32 v6, null, v3 ; encoding: [0xff,0x04,0x80,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x80,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_lshlrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xa2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa2,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_lshrrev_b32 v6, null, v3 ; encoding: [0xff,0x04,0xaa,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xaa,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_max_i32 v6, null, v3 ; encoding: [0xff,0x04,0xae,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xae,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_max_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x94,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x94,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_min_i32 v6, null, v3 ; encoding: [0xff,0x04,0xb0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xb0,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_min_num_f32 v6, null, v3 ; encoding: [0xff,0x04,0x96,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x96,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_mul_dx9_zero_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8e,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_mul_f32 v6, null, v3 ; encoding: [0xff,0x04,0x86,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x86,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_sub_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8a,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_sub_nc_u32 v6, null, v3 ; encoding: [0xff,0x04,0xa8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0xa8,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v2 :: v_dual_subrev_f32 v6, null, v3 ; encoding: [0xff,0x04,0x8c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x04,0x8c,0xc9,0x7c,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v255 :: v_dual_fmamk_f32 v6, null, 0xaf123456, v255 ; encoding: [0xff,0xfe,0x85,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x85,0xc9,0x7c,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, 0xaf123456, v255 :: v_dual_mov_b32 v6, null ; encoding: [0xff,0xfe,0x91,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0xfe,0x91,0xc9,0x7c,0x00,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x88,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x88,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa0,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xa0,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xac,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xac,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v6, exec_hi, v3 ; encoding: [0x7f,0x04,0x92,0xc9,0x7f,0x06,0x06,0xff] +0x7f,0x04,0x92,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmaak_f32 v6, exec_hi, v3, 0xaf123456 ; encoding: [0x7f,0x04,0x82,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7f,0x04,0x82,0xc9,0x7f,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x80,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x80,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa2,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xa2,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xaa,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xaa,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xae,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xae,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x94,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x94,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_i32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xb0,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xb0,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x96,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x96,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8e,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x8e,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x86,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x86,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8a,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x8a,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0xa8,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0xa8,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v6, vcc_hi, v3 ; encoding: [0x7f,0x04,0x8c,0xc9,0x6b,0x06,0x06,0xff] +0x7f,0x04,0x8c,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_fmamk_f32 v6, exec_hi, 0xaf123456, v255 ; encoding: [0x7f,0xfe,0x85,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7f,0xfe,0x85,0xc9,0x7f,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v6, vcc_hi ; encoding: [0x7f,0xfe,0x91,0xc9,0x6b,0x00,0x06,0xff] +0x7f,0xfe,0x91,0xc9,0x6b,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x88,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x88,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa0,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xa0,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xac,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xac,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v6, exec_lo, v3 ; encoding: [0x7e,0x04,0x92,0xc9,0x7e,0x06,0x06,0xff] +0x7e,0x04,0x92,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmaak_f32 v6, exec_lo, v3, 0xaf123456 ; encoding: [0x7e,0x04,0x82,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7e,0x04,0x82,0xc9,0x7e,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x80,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x80,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa2,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xa2,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xaa,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xaa,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xae,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xae,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x94,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x94,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_i32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xb0,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xb0,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x96,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x96,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8e,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x8e,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x86,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x86,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8a,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x8a,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0xa8,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0xa8,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v6, ttmp15, v3 ; encoding: [0x7e,0x04,0x8c,0xc9,0x7b,0x06,0x06,0xff] +0x7e,0x04,0x8c,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_fmamk_f32 v6, exec_lo, 0xaf123456, v255 ; encoding: [0x7e,0xfe,0x85,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7e,0xfe,0x85,0xc9,0x7e,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v6, ttmp15 ; encoding: [0x7e,0xfe,0x91,0xc9,0x7b,0x00,0x06,0xff] +0x7e,0xfe,0x91,0xc9,0x7b,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x88,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x88,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa0,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xa0,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xac,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xac,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_cndmask_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0x92,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x92,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmaak_f32 v6, m0, v3, 0xaf123456 ; encoding: [0x7d,0x04,0x82,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7d,0x04,0x82,0xc9,0x7d,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmac_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x80,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x80,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa2,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xa2,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v6, m0, v3 ; encoding: [0x7d,0x04,0xaa,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xaa,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xae,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xae,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x94,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x94,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_i32 v6, m0, v3 ; encoding: [0x7d,0x04,0xb0,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xb0,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_num_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x96,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x96,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8e,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x8e,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x86,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x86,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8a,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x8a,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v6, m0, v3 ; encoding: [0x7d,0x04,0xa8,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0xa8,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_subrev_f32 v6, m0, v3 ; encoding: [0x7d,0x04,0x8c,0xc9,0x7d,0x06,0x06,0xff] +0x7d,0x04,0x8c,0xc9,0x7d,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, m0, v255 :: v_dual_fmamk_f32 v6, m0, 0xaf123456, v255 ; encoding: [0x7d,0xfe,0x85,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7d,0xfe,0x85,0xc9,0x7d,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, m0, v255 :: v_dual_mov_b32 v6, m0 ; encoding: [0x7d,0xfe,0x91,0xc9,0x7d,0x00,0x06,0xff] +0x7d,0xfe,0x91,0xc9,0x7d,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x88,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x88,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xa0,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xa0,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xac,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xac,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3 ; encoding: [0x01,0x04,0x92,0xc9,0x01,0x06,0x06,0xff] +0x01,0x04,0x92,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmaak_f32 v6, s1, v3, 0xaf123456 ; encoding: [0x01,0x04,0x82,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x04,0x82,0xc9,0x01,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmac_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x80,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x80,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xa2,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xa2,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v6, s105, v3 ; encoding: [0x01,0x04,0xaa,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xaa,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xae,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xae,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x94,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x94,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_i32 v6, s105, v3 ; encoding: [0x01,0x04,0xb0,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xb0,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_num_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x96,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x96,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8e,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x8e,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x86,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x86,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8a,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x8a,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v6, s105, v3 ; encoding: [0x01,0x04,0xa8,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0xa8,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_subrev_f32 v6, s105, v3 ; encoding: [0x01,0x04,0x8c,0xc9,0x69,0x06,0x06,0xff] +0x01,0x04,0x8c,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s1, v255 :: v_dual_fmamk_f32 v6, s1, 0xaf123456, v255 ; encoding: [0x01,0xfe,0x85,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xfe,0x85,0xc9,0x01,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, s1, v255 :: v_dual_mov_b32 v6, s105 ; encoding: [0x01,0xfe,0x91,0xc9,0x69,0x00,0x06,0xff] +0x01,0xfe,0x91,0xc9,0x69,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x88,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x88,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xa0,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xa0,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xac,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xac,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3 ; encoding: [0x69,0x04,0x92,0xc9,0x69,0x06,0x06,0xff] +0x69,0x04,0x92,0xc9,0x69,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmaak_f32 v6, s105, v3, 0xaf123456 ; encoding: [0x69,0x04,0x82,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x69,0x04,0x82,0xc9,0x69,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmac_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x80,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x80,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xa2,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xa2,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v6, s1, v3 ; encoding: [0x69,0x04,0xaa,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xaa,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xae,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xae,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x94,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x94,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_i32 v6, s1, v3 ; encoding: [0x69,0x04,0xb0,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xb0,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_num_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x96,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x96,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8e,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x8e,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x86,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x86,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8a,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x8a,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v6, s1, v3 ; encoding: [0x69,0x04,0xa8,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0xa8,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_subrev_f32 v6, s1, v3 ; encoding: [0x69,0x04,0x8c,0xc9,0x01,0x06,0x06,0xff] +0x69,0x04,0x8c,0xc9,0x01,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, s105, v255 :: v_dual_fmamk_f32 v6, s105, 0xaf123456, v255 ; encoding: [0x69,0xfe,0x85,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x69,0xfe,0x85,0xc9,0x69,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, s105, v255 :: v_dual_mov_b32 v6, s1 ; encoding: [0x69,0xfe,0x91,0xc9,0x01,0x00,0x06,0xff] +0x69,0xfe,0x91,0xc9,0x01,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x88,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x88,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa0,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xa0,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xac,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xac,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0x92,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x92,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmaak_f32 v6, -1, v3, 0xaf123456 ; encoding: [0xfd,0x04,0x82,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0xfd,0x04,0x82,0xc9,0xc1,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x80,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x80,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa2,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xa2,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v6, -1, v3 ; encoding: [0xfd,0x04,0xaa,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xaa,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xae,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xae,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x94,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x94,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_i32 v6, -1, v3 ; encoding: [0xfd,0x04,0xb0,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xb0,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x96,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x96,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8e,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x8e,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x86,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x86,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8a,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x8a,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v6, -1, v3 ; encoding: [0xfd,0x04,0xa8,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0xa8,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v6, -1, v3 ; encoding: [0xfd,0x04,0x8c,0xc9,0xc1,0x06,0x06,0xff] +0xfd,0x04,0x8c,0xc9,0xc1,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_fmamk_f32 v6, -1, 0xaf123456, v255 ; encoding: [0xfd,0xfe,0x85,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0xfd,0xfe,0x85,0xc9,0xc1,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_mov_b32 v6, -1 ; encoding: [0xfd,0xfe,0x91,0xc9,0xc1,0x00,0x06,0xff] +0xfd,0xfe,0x91,0xc9,0xc1,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x88,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x88,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa0,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xa0,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xac,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xac,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v6, ttmp15, v3 ; encoding: [0x7b,0x04,0x92,0xc9,0x7b,0x06,0x06,0xff] +0x7b,0x04,0x92,0xc9,0x7b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmaak_f32 v6, ttmp15, v3, 0xaf123456 ; encoding: [0x7b,0x04,0x82,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x7b,0x04,0x82,0xc9,0x7b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x80,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x80,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa2,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xa2,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xaa,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xaa,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xae,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xae,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x94,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x94,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_i32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xb0,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xb0,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x96,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x96,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8e,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x8e,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x86,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x86,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8a,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x8a,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0xa8,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0xa8,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v6, vcc_lo, v3 ; encoding: [0x7b,0x04,0x8c,0xc9,0x6a,0x06,0x06,0xff] +0x7b,0x04,0x8c,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_fmamk_f32 v6, ttmp15, 0xaf123456, v255 ; encoding: [0x7b,0xfe,0x85,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x7b,0xfe,0x85,0xc9,0x7b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v6, vcc_lo ; encoding: [0x7b,0xfe,0x91,0xc9,0x6a,0x00,0x06,0xff] +0x7b,0xfe,0x91,0xc9,0x6a,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x88,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x88,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xa0,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xa0,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xac,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xac,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_cndmask_b32 v6, v255, v3 ; encoding: [0x01,0x05,0x92,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x92,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmaak_f32 v6, v255, v3, 0xaf123456 ; encoding: [0x01,0x05,0x82,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x01,0x05,0x82,0xc9,0xff,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmac_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x80,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x80,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xa2,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xa2,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v6, v255, v3 ; encoding: [0x01,0x05,0xaa,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xaa,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xae,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xae,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x94,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x94,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_i32 v6, v255, v3 ; encoding: [0x01,0x05,0xb0,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xb0,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_num_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x96,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x96,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8e,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x8e,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x86,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x86,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8a,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x8a,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v6, v255, v3 ; encoding: [0x01,0x05,0xa8,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0xa8,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_subrev_f32 v6, v255, v3 ; encoding: [0x01,0x05,0x8c,0xc9,0xff,0x07,0x06,0xff] +0x01,0x05,0x8c,0xc9,0xff,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v1, v255 :: v_dual_fmamk_f32 v6, v255, 0xaf123456, v255 ; encoding: [0x01,0xff,0x85,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x01,0xff,0x85,0xc9,0xff,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v1, v255 :: v_dual_mov_b32 v6, v255 ; encoding: [0x01,0xff,0x91,0xc9,0xff,0x01,0x06,0xff] +0x01,0xff,0x91,0xc9,0xff,0x01,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x88,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x88,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xa0,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xa0,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xac,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xac,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_cndmask_b32 v6, v3, v3 ; encoding: [0x02,0x05,0x92,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x92,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmaak_f32 v6, v3, v3, 0xaf123456 ; encoding: [0x02,0x05,0x82,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x02,0x05,0x82,0xc9,0x03,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmac_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x80,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x80,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xa2,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xa2,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v6, v3, v3 ; encoding: [0x02,0x05,0xaa,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xaa,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xae,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xae,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x94,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x94,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_i32 v6, v3, v3 ; encoding: [0x02,0x05,0xb0,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xb0,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_num_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x96,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x96,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8e,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x8e,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x86,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x86,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8a,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x8a,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v6, v3, v3 ; encoding: [0x02,0x05,0xa8,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0xa8,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_subrev_f32 v6, v3, v3 ; encoding: [0x02,0x05,0x8c,0xc9,0x03,0x07,0x06,0xff] +0x02,0x05,0x8c,0xc9,0x03,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v2, v255 :: v_dual_fmamk_f32 v6, v3, 0xaf123456, v255 ; encoding: [0x02,0xff,0x85,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x02,0xff,0x85,0xc9,0x03,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v2, v255 :: v_dual_mov_b32 v6, v3 ; encoding: [0x02,0xff,0x91,0xc9,0x03,0x01,0x06,0xff] +0x02,0xff,0x91,0xc9,0x03,0x01,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x88,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x88,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xa0,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xa0,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xac,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xac,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_cndmask_b32 v6, v2, v3 ; encoding: [0xff,0x05,0x92,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x92,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmaak_f32 v6, v2, v3, 0xaf123456 ; encoding: [0xff,0x05,0x82,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0xff,0x05,0x82,0xc9,0x02,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmac_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x80,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x80,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xa2,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xa2,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v6, v2, v3 ; encoding: [0xff,0x05,0xaa,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xaa,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xae,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xae,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x94,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x94,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_i32 v6, v2, v3 ; encoding: [0xff,0x05,0xb0,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xb0,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_num_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x96,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x96,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8e,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x8e,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x86,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x86,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8a,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x8a,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v6, v2, v3 ; encoding: [0xff,0x05,0xa8,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0xa8,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_subrev_f32 v6, v2, v3 ; encoding: [0xff,0x05,0x8c,0xc9,0x02,0x07,0x06,0xff] +0xff,0x05,0x8c,0xc9,0x02,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v255, v255 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v255 ; encoding: [0xff,0xff,0x85,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0xff,0xff,0x85,0xc9,0x02,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v255, v255 :: v_dual_mov_b32 v6, v2 ; encoding: [0xff,0xff,0x91,0xc9,0x02,0x01,0x06,0xff] +0xff,0xff,0x91,0xc9,0x02,0x01,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x88,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x88,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xa0,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xa0,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xac,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xac,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_cndmask_b32 v6, v4, v3 ; encoding: [0x03,0x05,0x92,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x92,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmaak_f32 v6, v4, v3, 0xaf123456 ; encoding: [0x03,0x05,0x82,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x03,0x05,0x82,0xc9,0x04,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmac_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x80,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x80,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xa2,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xa2,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v6, v4, v3 ; encoding: [0x03,0x05,0xaa,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xaa,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xae,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xae,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x94,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x94,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_i32 v6, v4, v3 ; encoding: [0x03,0x05,0xb0,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xb0,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_num_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x96,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x96,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8e,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x8e,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x86,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x86,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8a,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x8a,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v6, v4, v3 ; encoding: [0x03,0x05,0xa8,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0xa8,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_subrev_f32 v6, v4, v3 ; encoding: [0x03,0x05,0x8c,0xc9,0x04,0x07,0x06,0xff] +0x03,0x05,0x8c,0xc9,0x04,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v3, v255 :: v_dual_fmamk_f32 v6, v4, 0xaf123456, v255 ; encoding: [0x03,0xff,0x85,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x03,0xff,0x85,0xc9,0x04,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v3, v255 :: v_dual_mov_b32 v6, v4 ; encoding: [0x03,0xff,0x91,0xc9,0x04,0x01,0x06,0xff] +0x03,0xff,0x91,0xc9,0x04,0x01,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x88,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x88,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xa0,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xa0,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xac,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xac,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v6, v1, v3 ; encoding: [0x04,0x05,0x92,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x92,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmaak_f32 v6, v1, v3, 0xaf123456 ; encoding: [0x04,0x05,0x82,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf] +0x04,0x05,0x82,0xc9,0x01,0x07,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmac_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x80,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x80,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xa2,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xa2,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v6, v1, v3 ; encoding: [0x04,0x05,0xaa,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xaa,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xae,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xae,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x94,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x94,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_i32 v6, v1, v3 ; encoding: [0x04,0x05,0xb0,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xb0,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_num_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x96,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x96,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8e,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x8e,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x86,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x86,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8a,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x8a,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v6, v1, v3 ; encoding: [0x04,0x05,0xa8,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0xa8,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_subrev_f32 v6, v1, v3 ; encoding: [0x04,0x05,0x8c,0xc9,0x01,0x07,0x06,0xff] +0x04,0x05,0x8c,0xc9,0x01,0x07,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, v4, v255 :: v_dual_fmamk_f32 v6, v1, 0xaf123456, v255 ; encoding: [0x04,0xff,0x85,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf] +0x04,0xff,0x85,0xc9,0x01,0xff,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, v4, v255 :: v_dual_mov_b32 v6, v1 ; encoding: [0x04,0xff,0x91,0xc9,0x01,0x01,0x06,0xff] +0x04,0xff,0x91,0xc9,0x01,0x01,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x88,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x88,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa0,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xa0,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xac,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xac,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v6, vcc_hi, v3 ; encoding: [0x6b,0x04,0x92,0xc9,0x6b,0x06,0x06,0xff] +0x6b,0x04,0x92,0xc9,0x6b,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmaak_f32 v6, vcc_hi, v3, 0xaf123456 ; encoding: [0x6b,0x04,0x82,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6b,0x04,0x82,0xc9,0x6b,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x80,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x80,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa2,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xa2,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xaa,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xaa,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xae,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xae,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x94,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x94,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xb0,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xb0,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x96,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x96,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8e,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x8e,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x86,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x86,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8a,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x8a,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0xa8,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0xa8,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v6, exec_lo, v3 ; encoding: [0x6b,0x04,0x8c,0xc9,0x7e,0x06,0x06,0xff] +0x6b,0x04,0x8c,0xc9,0x7e,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_fmamk_f32 v6, vcc_hi, 0xaf123456, v255 ; encoding: [0x6b,0xfe,0x85,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6b,0xfe,0x85,0xc9,0x6b,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v6, exec_lo ; encoding: [0x6b,0xfe,0x91,0xc9,0x7e,0x00,0x06,0xff] +0x6b,0xfe,0x91,0xc9,0x7e,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x88,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x88,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa0,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xa0,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xac,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xac,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, vcc_lo, v3 ; encoding: [0x6a,0x04,0x92,0xc9,0x6a,0x06,0x06,0xff] +0x6a,0x04,0x92,0xc9,0x6a,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmaak_f32 v6, vcc_lo, v3, 0xaf123456 ; encoding: [0x6a,0x04,0x82,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf] +0x6a,0x04,0x82,0xc9,0x6a,0x06,0x06,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x80,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x80,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa2,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xa2,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xaa,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xaa,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xae,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xae,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x94,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x94,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xb0,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xb0,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x96,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x96,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8e,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x8e,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x86,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x86,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8a,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x8a,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0xa8,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0xa8,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v6, exec_hi, v3 ; encoding: [0x6a,0x04,0x8c,0xc9,0x7f,0x06,0x06,0xff] +0x6a,0x04,0x8c,0xc9,0x7f,0x06,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_fmamk_f32 v6, vcc_lo, 0xaf123456, v255 ; encoding: [0x6a,0xfe,0x85,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf] +0x6a,0xfe,0x85,0xc9,0x6a,0xfe,0x07,0xff,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v6, exec_hi ; encoding: [0x6a,0xfe,0x91,0xc9,0x7f,0x00,0x06,0xff] +0x6a,0xfe,0x91,0xc9,0x7f,0x00,0x06,0xff + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_add_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x88,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x88,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_add_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xa0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_ashrrev_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xac,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xac,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_cndmask_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x92,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x92,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_fmaak_f32 v255, 0xaf123456, v4, 0xaf123456 ; encoding: [0x7c,0x0a,0x82,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x82,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_fmac_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x80,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x80,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_fmamk_f32 v255, 0xaf123456, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x84,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x84,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_lshlrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xa2,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_lshrrev_b32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xaa,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xaa,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_max_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xae,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xae,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_max_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x94,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x94,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_min_i32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xb0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xb0,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_min_num_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x96,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x96,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_mov_b32 v255, 0xaf123456 ; encoding: [0x7c,0x0a,0x90,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x90,0xc9,0xff,0x00,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_mul_dx9_zero_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x8e,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_mul_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x86,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x86,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_sub_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x8a,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_sub_nc_u32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0xa8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0xa8,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf + +# GFX1250: v_dual_subrev_f32 v6, null, v5 :: v_dual_subrev_f32 v255, 0xaf123456, v4 ; encoding: [0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf] +0x7c,0x0a,0x8c,0xc9,0xff,0x08,0xfe,0x06,0x56,0x34,0x12,0xaf diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vopd3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vopd3.txt new file mode 100644 index 0000000000000..5fc7152d19ec0 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vopd3.txt @@ -0,0 +1,14278 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck %s -strict-whitespace --check-prefix=GFX1250 + +# GFX1250: v_dual_add_f32 v0, -v1, v2 :: v_dual_add_nc_u32 v5, v6, v7 ; encoding: [0x01,0x01,0x11,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x01,0x11,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_add_f32 v0, v1, v2 :: v_dual_add_f32 v5, -s6, v7 ; encoding: [0x01,0x41,0x10,0xcf,0x06,0x10,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x41,0x10,0xcf,0x06,0x10,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x80,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x11,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x10,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x80,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x11,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x10,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x80,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x11,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x10,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x10,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x80,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x11,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x10,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x10,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x80,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x11,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x10,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x10,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x10,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x80,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x11,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x10,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x10,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x10,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x80,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x11,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x10,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x10,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x10,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x80,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x11,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x10,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x10,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x10,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x10,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x80,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x11,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x10,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x10,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x81,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x11,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x10,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x10,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x10,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x81,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x11,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x10,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x10,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x10,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x81,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x11,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x10,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x10,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x10,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x81,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x11,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x10,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x10,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x10,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 ; encoding: [0x04,0x21,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x21,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x81,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x11,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x10,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x10,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x10,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x10,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x80,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x11,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x10,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x10,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x10,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x10,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x80,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x11,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x10,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x10,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x10,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[0:1], -s[8:9], -v[4:5] :: v_dual_subrev_f32 v5, v6, -v7 ; encoding: [0x08,0x60,0x84,0xcf,0x06,0x27,0x04,0x00,0x00,0x07,0x00,0x05] +0x08,0x60,0x84,0xcf,0x06,0x27,0x04,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_add_f64 v[0:1], -v[8:9], v[4:5] :: v_dual_mov_b32 v5, v6 ; encoding: [0x08,0x81,0x84,0xcf,0x06,0x03,0x04,0x00,0x00,0x00,0x00,0x05] +0x08,0x81,0x84,0xcf,0x06,0x03,0x04,0x00,0x00,0x00,0x00,0x05 + +# GFX1250: v_dual_add_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x84,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08] +0x06,0x41,0x84,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x40,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x85,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x00,0x85,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07] +0xc1,0x90,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09] +0xc1,0x00,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x85,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x10,0x85,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0xa0,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0xb0,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07] +0xc1,0x80,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x70,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x30,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x50,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x60,0x84,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x40,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x85,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x00,0x85,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07] +0xf0,0x90,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09] +0xf0,0x00,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x85,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x10,0x85,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0xa0,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0xb0,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07] +0xf0,0x80,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x70,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x30,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x50,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x60,0x84,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x84,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x7e,0x80,0x84,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x40,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x85,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x00,0x85,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x84,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x7e,0x90,0x84,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x7e,0x00,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x85,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x10,0x85,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0xa0,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0xb0,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x70,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x30,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x50,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x60,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x84,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x68,0x80,0x84,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x40,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x85,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x00,0x85,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x68,0x00,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x85,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x10,0x85,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0xa0,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0xb0,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x70,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x30,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x50,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x60,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x84,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x02,0x80,0x84,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x02,0x90,0x84,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x84,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0xfd,0x80,0x84,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x40,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x85,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x00,0x85,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +0xfd,0x90,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0xfd,0x00,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x85,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x10,0x85,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0xa0,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0xb0,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x70,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x30,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x50,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x60,0x84,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x84,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x7a,0x80,0x84,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x40,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x85,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x00,0x85,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x7a,0x90,0x84,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0x7a,0x00,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x85,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x10,0x85,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0xa0,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0xb0,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x70,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x30,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x50,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x60,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x84,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0xfe,0x81,0x84,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x41,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x85,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x01,0x85,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0xfe,0x91,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0xfe,0x01,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x85,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x11,0x85,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0xa1,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0xb1,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x71,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x31,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x51,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x61,0x84,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x84,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x02,0x81,0x84,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x41,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x85,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x01,0x85,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x02,0x91,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x02,0x01,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x85,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x11,0x85,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0xa1,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0xb1,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x71,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x31,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x51,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x61,0x84,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x84,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x04,0x81,0x84,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x85,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x01,0x85,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x85,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x01,0x85,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07] +0x04,0x91,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x04,0x01,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x04,0x01,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x85,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x11,0x85,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x85,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x11,0x85,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x71,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x71,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x31,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x31,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x51,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x51,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x61,0x84,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x61,0x84,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x84,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x06,0x81,0x84,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x41,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x85,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x01,0x85,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x06,0x91,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x06,0x01,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x85,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x11,0x85,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0xa1,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0xb1,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x71,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x31,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x51,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x61,0x84,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x84,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x6a,0x80,0x84,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x40,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x85,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x00,0x85,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x6a,0x90,0x84,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0x6a,0x00,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x85,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x10,0x85,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0xa0,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0xb0,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x70,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x30,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x50,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x60,0x84,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x61,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x92 ; encoding: [0x06,0x21,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x92,0x07] +0x06,0x21,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x92,0x07 + +# GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09] +0x06,0x31,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09 + +# GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x51,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_add_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x41,0x85,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_add_nc_u32 v0, v1, v2 :: v_dual_fma_f32 v5, -v6, v7, -v8 ; encoding: [0x01,0x31,0x41,0xcf,0x06,0x51,0x02,0x00,0x00,0x07,0x08,0x05] +0x01,0x31,0x41,0xcf,0x06,0x51,0x02,0x00,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x40,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x40,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x61,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x84 ; encoding: [0x04,0x21,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x84,0x07] +0x04,0x21,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x84,0x07 + +# GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +0x04,0x31,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07 + +# GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x51,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_add_nc_u32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x41,0x41,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x41,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x41,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x41,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x41,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x40,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x41,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x41,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x41,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x41,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x40,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x41,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x41,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x41,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x41,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x40,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x40,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x41,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x41,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x41,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x41,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x40,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x40,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x41,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x41,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x41,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x41,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x40,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x40,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x40,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x41,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x41,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x41,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x41,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x40,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x40,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x41,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x41,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x40,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x41,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x41,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x40,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x40,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x40,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x41,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x41,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x41,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x41,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x40,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x40,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x40,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x41,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x41,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x40,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x41,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x41,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x40,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x40,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x41,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x41,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x41,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x41,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x40,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x40,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x40,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x41,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x41,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x41,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x41,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x40,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x40,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x40,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x41,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x41,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x41,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x41,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x40,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x40,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x40,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x41,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x41,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x41,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x41,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x40,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x40,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x40,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x41,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x41,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x41,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x41,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x40,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x40,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x40,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x41,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x41,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x40,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x41,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x41,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x40,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x40,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x40,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x41,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x41,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x40,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x41,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x41,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x40,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_add_nc_u32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x40,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x40,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x58,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x58,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x61,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x87 ; encoding: [0x04,0x21,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x87,0x07] +0x04,0x21,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x87,0x07 + +# GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +0x04,0x31,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07 + +# GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x51,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_ashrrev_i32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x41,0x59,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x59,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x59,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x59,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x59,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x58,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x59,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x59,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x59,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x59,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x58,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x59,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x59,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x59,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x59,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x58,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x58,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x59,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x59,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x59,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x59,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x58,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x58,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x59,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x59,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x59,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x59,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x58,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x58,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x58,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x59,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x59,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x59,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x59,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x58,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x58,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x59,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x59,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x58,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x59,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x59,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x58,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x58,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x58,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x59,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x59,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x59,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x59,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x58,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x58,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x58,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x59,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x59,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x58,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x59,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x59,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x58,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x58,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x59,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x59,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x59,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x59,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x58,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x58,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x58,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x59,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x59,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x59,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x59,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x58,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x58,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x58,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x59,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x59,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x59,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x59,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x58,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x58,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x58,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x59,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x59,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x59,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x59,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x58,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x58,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x58,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x59,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x59,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x59,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x59,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x58,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x58,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x58,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x59,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x59,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x58,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x59,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x59,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x58,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x58,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x58,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x59,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x59,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x58,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x59,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x59,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x58,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_ashrrev_i32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x58,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x58,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v0, v1, v2, s96 :: v_dual_add_f32 v5, -s6, -v7 ; encoding: [0x01,0x41,0x24,0xcf,0x06,0x30,0x02,0x60,0x00,0x07,0x00,0x05] +0x01,0x41,0x24,0xcf,0x06,0x30,0x02,0x60,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_cndmask_b32 v0, v1, v2, s96 :: v_dual_fmac_f32 v5, -v6, -v7 ; encoding: [0x01,0x01,0x24,0xcf,0x06,0x31,0x02,0x60,0x00,0x07,0x00,0x05] +0x01,0x01,0x24,0xcf,0x06,0x31,0x02,0x60,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_cndmask_b32 v0, v1, v2, vcc_lo :: v_dual_add_f32 v5, -s6, -v7 ; encoding: [0x01,0x41,0x24,0xcf,0x06,0x30,0x02,0x6a,0x00,0x07,0x00,0x05] +0x01,0x41,0x24,0xcf,0x06,0x30,0x02,0x6a,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_cndmask_b32 v0, v1, v2, vcc_lo :: v_dual_fmac_f32 v5, -v6, -v7 ; encoding: [0x01,0x01,0x24,0xcf,0x06,0x31,0x02,0x6a,0x00,0x07,0x00,0x05] +0x01,0x01,0x24,0xcf,0x06,0x31,0x02,0x6a,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x40,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x00,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x60,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x00,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x10,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x50,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x70,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x80,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x00,0x00,0x07] +0xc1,0x80,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x70,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x30,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x50,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x40,0x25,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, -1, v4, vcc_lo :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07] +0xc1,0x60,0x24,0xcf,0xfd,0x00,0x04,0x6a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x40,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x00,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x60,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x00,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x10,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x50,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x70,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x80,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x00,0x00,0x07] +0xf0,0x80,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x70,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x30,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x50,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x40,0x25,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, 0.5, v3, vcc_lo :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07] +0xf0,0x60,0x24,0xcf,0xf0,0x00,0x03,0x6a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x40,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x40,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x7f,0x00,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x00,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x7f,0x60,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x60,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x00,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x00,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x7f,0x10,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x10,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x7f,0x50,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x50,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x7f,0x70,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x70,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x7f,0xa0,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x7f,0x80,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x80,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x7f,0xb0,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x70,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x70,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x30,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x30,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x50,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x50,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x7f,0x40,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x40,0x25,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v2, vcc_lo :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x7f,0x60,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7f,0x60,0x24,0xcf,0x7f,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_hi, v255, vcc_lo :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x7f,0x80,0x24,0xcf,0x7f,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0x7f,0x80,0x24,0xcf,0x7f,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x40,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x40,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x7e,0x00,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x00,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x7e,0x60,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x60,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x00,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x00,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x7e,0x10,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x10,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x7e,0x50,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x50,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x7e,0x70,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x70,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x7e,0xa0,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x7e,0x80,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x80,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x7e,0xb0,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x70,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x70,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x30,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x30,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x50,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x50,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x7e,0x40,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x40,0x25,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v2, vcc_lo :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x7e,0x60,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7e,0x60,0x24,0xcf,0x7e,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, exec_lo, v255, vcc_lo :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x7e,0x80,0x24,0xcf,0x7e,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0x7e,0x80,0x24,0xcf,0x7e,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x40,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x00,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x60,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x00,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x10,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x50,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x70,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x80,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x70,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x30,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x50,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x40,0x25,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v2, vcc_lo :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7d,0x60,0x24,0xcf,0x7d,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, m0, v255, vcc_lo :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x24,0xcf,0x7d,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0x7d,0x80,0x24,0xcf,0x7d,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x01,0x40,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x40,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x01,0x00,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x00,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x01,0x60,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x60,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x01,0x90,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x01,0x00,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x00,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x01,0x10,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x10,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x01,0x50,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x50,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x01,0x70,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x70,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x01,0xa0,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0xa0,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x01,0x80,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x80,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x01,0xb0,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0xb0,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x01,0x70,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x70,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x01,0x30,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x30,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x01,0x50,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x50,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x01,0x40,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x40,0x25,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v2, vcc_lo :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x01,0x60,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x60,0x24,0xcf,0x01,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s1, v255, vcc_lo :: v_dual_mov_b32 v7, s1 ; encoding: [0x01,0x80,0x24,0xcf,0x01,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0x01,0x80,0x24,0xcf,0x01,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x69,0x40,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x40,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x69,0x00,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x00,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x69,0x60,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x60,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x69,0x90,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x69,0x00,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x00,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x69,0x10,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x10,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x69,0x50,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x50,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x69,0x70,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x70,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x69,0xa0,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0xa0,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x69,0x80,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x80,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x69,0xb0,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0xb0,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x69,0x70,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x70,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x69,0x30,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x30,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x69,0x50,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x50,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x69,0x40,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x40,0x25,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v2, vcc_lo :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x69,0x60,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x69,0x60,0x24,0xcf,0x69,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, s105, v255, vcc_lo :: v_dual_mov_b32 v7, s105 ; encoding: [0x69,0x80,0x24,0xcf,0x69,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0x69,0x80,0x24,0xcf,0x69,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x40,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x00,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x60,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x00,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x10,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x50,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x70,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x80,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x70,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x30,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x50,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x40,0x25,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v2, vcc_lo :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0xfd,0x60,0x24,0xcf,0xc1,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, src_scc, v255, vcc_lo :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x24,0xcf,0xc1,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0xfd,0x80,0x24,0xcf,0xc1,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x40,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x40,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7b,0x00,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x00,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7b,0x60,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x60,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x00,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x00,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7b,0x10,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x10,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7b,0x50,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x50,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7b,0x70,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x70,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7b,0xa0,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7b,0x80,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x80,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7b,0xb0,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x70,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x70,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x30,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x30,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x50,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x50,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7b,0x40,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x40,0x25,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v2, vcc_lo :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7b,0x60,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x7b,0x60,0x24,0xcf,0x7b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, ttmp15, v255, vcc_lo :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7b,0x80,0x24,0xcf,0x7b,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0x7b,0x80,0x24,0xcf,0x7b,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x41,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x01,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x61,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x01,0x91,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x01,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x11,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x51,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x71,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0xa1,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x81,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0xb1,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x71,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x31,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x51,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x41,0x25,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v2, vcc_lo :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x01,0x61,0x24,0xcf,0xff,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v1, v255, vcc_lo :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x24,0xcf,0xff,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +0x01,0x81,0x24,0xcf,0xff,0x01,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x41,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x01,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x61,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x02,0x91,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x01,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x11,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x51,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x71,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0xa1,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x81,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0xb1,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x71,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x31,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x51,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x41,0x25,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v2, vcc_lo :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x02,0x61,0x24,0xcf,0x03,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v2, v255, vcc_lo :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x24,0xcf,0x03,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +0x02,0x81,0x24,0xcf,0x03,0x01,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x41,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x01,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x61,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +0xff,0x91,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x01,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x11,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x51,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x71,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0xa1,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x81,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0xb1,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x71,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x31,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x51,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x41,0x25,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v2, vcc_lo :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0xff,0x61,0x24,0xcf,0x02,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v255, v255, vcc_lo :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x24,0xcf,0x02,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +0xff,0x81,0x24,0xcf,0x02,0x01,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x41,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x01,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x61,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x03,0x91,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x01,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x11,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x51,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x71,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0xa1,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x81,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0xb1,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x71,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x31,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x51,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x41,0x25,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v2, vcc_lo :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x03,0x61,0x24,0xcf,0x04,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v3, v255, vcc_lo :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x24,0xcf,0x04,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +0x03,0x81,0x24,0xcf,0x04,0x01,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x41,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x01,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x61,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:1 ; encoding: [0x04,0x21,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x01,0x07] +0x04,0x21,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x01,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x04,0x07] +0x04,0x31,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x01,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x11,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x51,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x71,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0xa1,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x81,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0xb1,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x71,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x31,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x51,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x41,0x25,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s96 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07] +0x04,0x61,0x24,0xcf,0x01,0x01,0x02,0x60,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, s97 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x24,0xcf,0x01,0x01,0x02,0x61,0xff,0x03,0x60,0x07] +0x04,0x91,0x24,0xcf,0x01,0x01,0x02,0x61,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x41,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x01,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x61,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_bitop2_b32 v7, v1, v3 bitop3:1 ; encoding: [0x04,0x21,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x01,0x07] +0x04,0x21,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x01,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x04,0x91,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x04,0x07] +0x04,0x31,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x01,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x11,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x51,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x71,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0xa1,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x81,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0xb1,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x71,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x31,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x51,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x41,0x25,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v2, vcc_lo :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07] +0x04,0x61,0x24,0xcf,0x01,0x01,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v255, s96 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x24,0xcf,0x01,0x01,0xff,0x60,0xff,0x00,0x00,0x07] +0x04,0x81,0x24,0xcf,0x01,0x01,0xff,0x60,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, v4, v255, vcc_lo :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x24,0xcf,0x01,0x01,0xff,0x6a,0xff,0x00,0x00,0x07] +0x04,0x81,0x24,0xcf,0x01,0x01,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x40,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x40,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x6b,0x00,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x00,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x6b,0x60,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x60,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x00,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x00,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x6b,0x10,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x10,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x6b,0x50,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x50,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x6b,0x70,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x70,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0xa0,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x6b,0x80,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x80,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0xb0,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x70,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x70,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x30,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x30,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x50,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x50,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x6b,0x40,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x40,0x25,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v2, vcc_lo :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x6b,0x60,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6b,0x60,0x24,0xcf,0x6b,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_hi, v255, vcc_lo :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x6b,0x80,0x24,0xcf,0x6b,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0x6b,0x80,0x24,0xcf,0x6b,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x40,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x40,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x6a,0x00,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x00,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x6a,0x60,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x60,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_cndmask_b32 v28, -v15, v15, s46 :: v_dual_cndmask_b32 v29, -v13, -v13, s46 ; encoding: [0x0f,0x91,0x24,0xcf,0x0d,0x33,0x0f,0x2e,0x1c,0x0d,0x2e,0x1d] +0x0f,0x91,0x24,0xcf,0x0d,0x33,0x0f,0x2e,0x1c,0x0d,0x2e,0x1d + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x00,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x00,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x6a,0x10,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x10,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x6a,0x50,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x50,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x6a,0x70,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x70,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0xa0,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x6a,0x80,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x80,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0xb0,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x70,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x70,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x30,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x30,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x50,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x50,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x6a,0x40,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x40,0x25,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v2, vcc_lo :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x6a,0x60,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07] +0x6a,0x60,0x24,0xcf,0x6a,0x00,0x02,0x6a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_cndmask_b32 v255, vcc_lo, v255, vcc_lo :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x6a,0x80,0x24,0xcf,0x6a,0x00,0xff,0x6a,0xff,0x00,0x00,0x07] +0x6a,0x80,0x24,0xcf,0x6a,0x00,0xff,0x6a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v0, -s1, v2, v3 :: v_dual_bitop2_b32 v5, v6, v7 ; encoding: [0x01,0x20,0x4d,0xcf,0x06,0x03,0x02,0x03,0x00,0x07,0x00,0x05] +0x01,0x20,0x4d,0xcf,0x06,0x03,0x02,0x03,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_fma_f32 v0, -v1, v2, v3 :: v_dual_fma_f32 v5, v6, v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x03,0x02,0x03,0x00,0x07,0x08,0x05] +0x01,0x31,0x4d,0xcf,0x06,0x03,0x02,0x03,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_fma_f32 v0, v1, -v2, v3 :: v_dual_fma_f32 v5, v6, v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x05,0x02,0x03,0x00,0x07,0x08,0x05] +0x01,0x31,0x4d,0xcf,0x06,0x05,0x02,0x03,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_fma_f32 v0, v1, v2, -v3 :: v_dual_fma_f32 v5, v6, v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x09,0x02,0x03,0x00,0x07,0x08,0x05] +0x01,0x31,0x4d,0xcf,0x06,0x09,0x02,0x03,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, -v6, v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x11,0x02,0x03,0x00,0x07,0x08,0x05] +0x01,0x31,0x4d,0xcf,0x06,0x11,0x02,0x03,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, v6, -v7, v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x21,0x02,0x03,0x00,0x07,0x08,0x05] +0x01,0x31,0x4d,0xcf,0x06,0x21,0x02,0x03,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_fma_f32 v0, v1, v2, v3 :: v_dual_fma_f32 v5, v6, v7, -v8 ; encoding: [0x01,0x31,0x4d,0xcf,0x06,0x41,0x02,0x03,0x00,0x07,0x08,0x05] +0x01,0x31,0x4d,0xcf,0x06,0x41,0x02,0x03,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09] +0x04,0x61,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09 + +# GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_bitop2_b32 v7, v1, v3 ; encoding: [0x04,0x21,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x03,0x00,0x07] +0x04,0x21,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x03,0x04,0x07] +0x04,0x31,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x03,0x04,0x07 + +# GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09] +0x04,0x51,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09 + +# GFX1250: v_dual_fma_f32 v1, v4, v2, v10 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09] +0x04,0x41,0x4d,0xcf,0x01,0x01,0x02,0x0a,0x01,0x0d,0x00,0x09 + +# GFX1250: v_dual_fma_f32 v254, v4, v2, v10 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x41,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0x40,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x4d,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0x00,0x4d,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0x00,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x4d,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0x10,0x4d,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x00,0x00,0x07] +0xc1,0x80,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0x70,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0x30,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0x50,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, -1, v4, v10 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07] +0xc1,0x60,0x4c,0xcf,0xfd,0x00,0x04,0x0a,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0x40,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x4d,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0x00,0x4d,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0x00,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x4d,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0x10,0x4d,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x00,0x00,0x07] +0xf0,0x80,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0x70,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0x30,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0x50,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, 0.5, v3, v10 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07] +0xf0,0x60,0x4c,0xcf,0xf0,0x00,0x03,0x0a,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0x40,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x4d,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0x00,0x4d,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0x00,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x4d,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0x10,0x4d,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0x70,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0x30,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0x50,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v2, v10 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7f,0x60,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_hi, v255, v10 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x4c,0xcf,0x6b,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0x7f,0x80,0x4c,0xcf,0x6b,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0x40,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x4d,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0x00,0x4d,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0x00,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x4d,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0x10,0x4d,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0x70,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0x30,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0x50,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v2, v10 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7e,0x60,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, exec_lo, v255, v10 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x4c,0xcf,0x7b,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0x7e,0x80,0x4c,0xcf,0x7b,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0x40,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x4d,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0x00,0x4d,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0x00,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x4d,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0x10,0x4d,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0x70,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0x30,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0x50,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v2, v10 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7d,0x60,0x4c,0xcf,0x7d,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, m0, v255, v10 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x4c,0xcf,0x7d,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0x7d,0x80,0x4c,0xcf,0x7d,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x40,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x4d,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x00,0x4d,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x01,0x90,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x00,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x4d,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x10,0x4d,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0xa0,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0xb0,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x70,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x30,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x50,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v2, v10 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x60,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s1, v255, v10 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x4c,0xcf,0x69,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0x01,0x80,0x4c,0xcf,0x69,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0x40,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x4d,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0x00,0x4d,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x69,0x90,0x4c,0xcf,0x69,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0x00,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x4d,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0x10,0x4d,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0xa0,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0xb0,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0x70,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0x30,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0x50,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v2, v10 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x69,0x60,0x4c,0xcf,0x01,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, s105, v255, v10 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x4c,0xcf,0x01,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0x69,0x80,0x4c,0xcf,0x01,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0x40,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x4d,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0x00,0x4d,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0x00,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x4d,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0x10,0x4d,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0x70,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0x30,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0x50,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v2, v10 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0xfd,0x60,0x4c,0xcf,0xc1,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, src_scc, v255, v10 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x4c,0xcf,0xc1,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0xfd,0x80,0x4c,0xcf,0xc1,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0x40,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x4d,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0x00,0x4d,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x4c,0xcf,0x7b,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0x00,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x4d,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0x10,0x4d,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0x70,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0x30,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0x50,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v2, v10 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x7b,0x60,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, ttmp15, v255, v10 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x4c,0xcf,0x6a,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0x7b,0x80,0x4c,0xcf,0x6a,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x41,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x4d,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x01,0x4d,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x01,0x91,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x01,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x4d,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x11,0x4d,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0xa1,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0xb1,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x71,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x31,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x51,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v2, v10 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x01,0x61,0x4c,0xcf,0xff,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v1, v255, v10 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x4c,0xcf,0xff,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +0x01,0x81,0x4c,0xcf,0xff,0x01,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0x41,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x4d,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0x01,0x4d,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x02,0x91,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0x01,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x4d,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0x11,0x4d,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0xa1,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0xb1,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0x71,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0x31,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0x51,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v2, v10 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x02,0x61,0x4c,0xcf,0x03,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v2, v255, v10 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x4c,0xcf,0x03,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +0x02,0x81,0x4c,0xcf,0x03,0x01,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0x41,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x4d,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0x01,0x4d,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +0xff,0x91,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0x01,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x4d,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0x11,0x4d,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0xa1,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0xb1,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0x71,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0x31,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0x51,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v2, v10 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0xff,0x61,0x4c,0xcf,0x02,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v255, v255, v10 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x4c,0xcf,0x02,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +0xff,0x81,0x4c,0xcf,0x02,0x01,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0x41,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x4d,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0x01,0x4d,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x03,0x91,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0x01,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x4d,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0x11,0x4d,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0xa1,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0xb1,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0x71,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0x31,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0x51,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v2, v10 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x03,0x61,0x4c,0xcf,0x04,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v3, v255, v10 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x4c,0xcf,0x04,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +0x03,0x81,0x4c,0xcf,0x04,0x01,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x4d,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0x01,0x4d,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x60,0x07] +0x04,0x91,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x04,0x91,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0x01,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x4d,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0x11,0x4d,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0xa1,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0xb1,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0x71,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0x31,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0x51,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v2, v10 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07] +0x04,0x61,0x4c,0xcf,0x01,0x01,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, v4, v255, v10 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x4c,0xcf,0x01,0x01,0xff,0x0a,0xff,0x00,0x00,0x07] +0x04,0x81,0x4c,0xcf,0x01,0x01,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0x40,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x4d,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0x00,0x4d,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x4c,0xcf,0x6b,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0x00,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x4d,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0x10,0x4d,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0x70,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0x30,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0x50,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v2, v10 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6b,0x60,0x4c,0xcf,0x7e,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_hi, v255, v10 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x4c,0xcf,0x7e,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0x6b,0x80,0x4c,0xcf,0x7e,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0x40,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x4d,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0x00,0x4d,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x4c,0xcf,0x6a,0x00,0x02,0x0a,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0x00,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x4d,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0x10,0x4d,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0x70,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0x30,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0x50,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v2, v10 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07] +0x6a,0x60,0x4c,0xcf,0x7f,0x00,0x02,0x0a,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f32 v255, vcc_lo, v255, v10 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x4c,0xcf,0x7f,0x00,0xff,0x0a,0xff,0x00,0x00,0x07] +0x6a,0x80,0x4c,0xcf,0x7f,0x00,0xff,0x0a,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[0:1], -v[8:9], -v[4:5], -v[10:11] :: v_dual_add_nc_u32 v5, v6, v7 ; encoding: [0x08,0x01,0x81,0xcf,0x06,0x0f,0x04,0x0a,0x00,0x07,0x00,0x05] +0x08,0x01,0x81,0xcf,0x06,0x0f,0x04,0x0a,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_fma_f64 v[0:1], v[8:9], v[4:5], -v[10:11] :: v_dual_fma_f32 v5, v6, v7, -v8 ; encoding: [0x08,0x31,0x81,0xcf,0x06,0x49,0x04,0x0a,0x00,0x07,0x08,0x05] +0x08,0x31,0x81,0xcf,0x06,0x49,0x04,0x0a,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_fma_f64 v[252:253], v[6:7], v[4:5], v[10:11] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfc,0x03,0x00,0x08] +0x06,0x41,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfc,0x03,0x00,0x08 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0x40,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x81,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0x00,0x81,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x6a,0x07] +0xc1,0x90,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x09] +0xc1,0x00,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x81,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0x10,0x81,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0xa0,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0xb0,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x00,0x00,0x07] +0xc1,0x80,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0x70,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0x30,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0x50,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], -1, v[6:7], v[10:11] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07] +0xc1,0x60,0x80,0xcf,0xfd,0x00,0x06,0x0a,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0x40,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x81,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0x00,0x81,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x6a,0x07] +0xf0,0x90,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x09] +0xf0,0x00,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x81,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0x10,0x81,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0xa0,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0xb0,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x00,0x00,0x07] +0xf0,0x80,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0x70,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0x30,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0x50,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], 0.5, v[8:9], v[10:11] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07] +0xf0,0x60,0x80,0xcf,0xf0,0x00,0x08,0x0a,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[254:255], v[10:11] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x80,0xcf,0x7b,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0x7e,0x80,0x80,0xcf,0x7b,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0x40,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x81,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0x00,0x81,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x80,0xcf,0x0a,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0x7e,0x90,0x80,0xcf,0x0a,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +0x7e,0x00,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x81,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0x10,0x81,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0xa0,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0xb0,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0x70,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0x30,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0x50,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], exec, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7e,0x60,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[254:255], v[10:11] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x80,0xcf,0x01,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0x68,0x80,0x80,0xcf,0x01,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0x40,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x81,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0x00,0x81,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +0x68,0x00,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x81,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0x10,0x81,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0xa0,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0xb0,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0x70,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0x30,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0x50,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[104:105], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x68,0x60,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[2:3], v[254:255], v[10:11] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x80,0xcf,0x69,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0x02,0x80,0x80,0xcf,0x69,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], s[2:3], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0x02,0x90,0x80,0xcf,0x01,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[254:255], v[10:11] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x80,0xcf,0xc1,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0xfd,0x80,0x80,0xcf,0xc1,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0x40,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x81,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0x00,0x81,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0xfd,0x90,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09] +0xfd,0x00,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x81,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0x10,0x81,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0xa0,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0xb0,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0x70,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0x30,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0x50,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], src_scc, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfd,0x60,0x80,0xcf,0xc1,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[254:255], v[10:11] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x80,0xcf,0x6a,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0x7a,0x80,0x80,0xcf,0x6a,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0x40,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x81,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0x00,0x81,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0x7a,0x90,0x80,0xcf,0x0f,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09] +0x7a,0x00,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x81,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0x10,0x81,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0xa0,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0xb0,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0x70,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0x30,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0x50,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], ttmp[14:15], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x7a,0x60,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x80,0xcf,0x04,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0xfe,0x81,0x80,0xcf,0x04,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0x41,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x81,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0x01,0x81,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0xfe,0x91,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +0xfe,0x01,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x81,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0x11,0x81,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0xa1,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0xb1,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0x71,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0x31,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0x51,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[254:255], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0xfe,0x61,0x80,0xcf,0x05,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x80,0xcf,0xfd,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0x02,0x81,0x80,0xcf,0xfd,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0x41,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x81,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0x01,0x81,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0x02,0x91,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +0x02,0x01,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x81,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0x11,0x81,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0xa1,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0xb1,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0x71,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0x31,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0x51,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[2:3], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x02,0x61,0x80,0xcf,0xfd,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x80,0xcf,0x03,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0x04,0x81,0x80,0xcf,0x03,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x41,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x81,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x01,0x81,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x81,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x01,0x81,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x60,0x07] +0x04,0x91,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x60,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +0x04,0x01,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +0x04,0x01,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x81,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x11,0x81,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x81,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x11,0x81,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x71,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x71,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x31,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x31,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x51,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x51,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x61,0x80,0xcf,0x02,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[4:5], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x04,0x61,0x80,0xcf,0x03,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[254:255], v[10:11] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x80,0xcf,0x04,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0x06,0x81,0x80,0xcf,0x04,0x01,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0x41,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x81,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0x01,0x81,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0x06,0x91,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09] +0x06,0x01,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x81,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0x11,0x81,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0xa1,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0xb1,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0x71,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0x31,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0x51,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], v[6:7], v[4:5], v[10:11] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x06,0x61,0x80,0xcf,0x04,0x01,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[254:255], v[10:11] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x80,0xcf,0x7f,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07] +0x6a,0x80,0x80,0xcf,0x7f,0x00,0xfe,0x0a,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0x40,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x81,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0x00,0x81,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x6a,0x07] +0x6a,0x90,0x80,0xcf,0x6a,0x00,0x04,0x0a,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09] +0x6a,0x00,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x81,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0x10,0x81,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0xa0,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0xb0,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0x70,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0x30,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0x50,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[254:255], vcc, v[4:5], v[10:11] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07] +0x6a,0x60,0x80,0xcf,0x7f,0x00,0x04,0x0a,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09] +0x06,0x61,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x91 ; encoding: [0x06,0x21,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x03,0x91,0x07] +0x06,0x21,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x03,0x91,0x07 + +# GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x04,0x09] +0x06,0x31,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x04,0x09 + +# GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09] +0x06,0x51,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_fma_f64 v[2:3], v[6:7], v[4:5], v[10:11] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09] +0x06,0x41,0x81,0xcf,0x01,0x01,0x04,0x0a,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v0, -v1, -v2 :: v_dual_ashrrev_i32 v5, v6, v7 ; encoding: [0x01,0x61,0x01,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x61,0x01,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_fmac_f32 v0, v1, -v2 :: v_dual_fmac_f32 v5, -v6, v7 ; encoding: [0x01,0x01,0x00,0xcf,0x06,0x15,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x01,0x00,0xcf,0x06,0x15,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x80,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x01,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x00,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x80,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x01,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x00,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x80,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x01,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x00,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x00,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x80,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x01,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x00,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x00,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x80,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x01,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x00,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x00,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x00,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x80,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x01,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x00,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x00,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x00,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x80,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x01,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x00,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x00,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x00,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x80,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x01,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x00,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x00,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x00,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x00,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x80,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x01,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x00,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x00,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x81,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x01,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x00,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x00,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x00,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x81,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x01,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x00,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x00,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x00,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x81,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x01,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x00,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x00,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x00,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x81,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x01,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x00,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x00,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x00,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x14 ; encoding: [0x04,0x21,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x14,0x07] +0x04,0x21,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x14,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x81,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x01,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x00,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x00,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x00,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x00,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x80,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x01,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x00,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x00,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x00,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x00,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x80,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x01,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x00,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x00,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x00,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_fmac_f32 v7, -1, v4 :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x00,0xcf,0xfd,0x00,0x04,0x00,0x07,0x05,0x00,0x09] +0xc1,0x00,0x00,0xcf,0xfd,0x00,0x04,0x00,0x07,0x05,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, 0.5, v3 :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x00,0xcf,0xf0,0x00,0x03,0x00,0x07,0x02,0x00,0x09] +0xf0,0x00,0x00,0xcf,0xf0,0x00,0x03,0x00,0x07,0x02,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, exec_hi, v2 :: v_dual_fmac_f32 v9, vcc_hi, v3 ; encoding: [0x7f,0x00,0x00,0xcf,0x6b,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0x7f,0x00,0x00,0xcf,0x6b,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, exec_lo, v2 :: v_dual_fmac_f32 v9, ttmp15, v3 ; encoding: [0x7e,0x00,0x00,0xcf,0x7b,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0x7e,0x00,0x00,0xcf,0x7b,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, m0, v2 :: v_dual_fmac_f32 v9, m0, v3 ; encoding: [0x7d,0x00,0x00,0xcf,0x7d,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0x7d,0x00,0x00,0xcf,0x7d,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, s1, v2 :: v_dual_fmac_f32 v9, s105, v3 ; encoding: [0x01,0x00,0x00,0xcf,0x69,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0x01,0x00,0x00,0xcf,0x69,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, s105, v2 :: v_dual_fmac_f32 v9, s1, v3 ; encoding: [0x69,0x00,0x00,0xcf,0x01,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0x69,0x00,0x00,0xcf,0x01,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, src_scc, v2 :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x00,0xcf,0xc1,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0xfd,0x00,0x00,0xcf,0xc1,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, ttmp15, v2 :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7b,0x00,0x00,0xcf,0x6a,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0x7b,0x00,0x00,0xcf,0x6a,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmac_f32 v9, v255, v3 ; encoding: [0x01,0x01,0x00,0xcf,0xff,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +0x01,0x01,0x00,0xcf,0xff,0x01,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, v2, v2 :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x02,0x01,0x00,0xcf,0x03,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +0x02,0x01,0x00,0xcf,0x03,0x01,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, v255, v2 :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0xff,0x01,0x00,0xcf,0x02,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +0xff,0x01,0x00,0xcf,0x02,0x01,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, v3, v2 :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x03,0x01,0x00,0xcf,0x04,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +0x03,0x01,0x00,0xcf,0x04,0x01,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, v4, v2 :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x04,0x01,0x00,0xcf,0x01,0x01,0x02,0x00,0x07,0x03,0x00,0x09] +0x04,0x01,0x00,0xcf,0x01,0x01,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, vcc_hi, v2 :: v_dual_fmac_f32 v9, exec_lo, v3 ; encoding: [0x6b,0x00,0x00,0xcf,0x7e,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0x6b,0x00,0x00,0xcf,0x7e,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_fmac_f32 v7, vcc_lo, v2 :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x00,0xcf,0x7f,0x00,0x02,0x00,0x07,0x03,0x00,0x09] +0x6a,0x00,0x00,0xcf,0x7f,0x00,0x02,0x00,0x07,0x03,0x00,0x09 + +# GFX1250: v_dual_lshlrev_b32 v0, v1, v2 :: v_dual_min_num_f32 v5, -s6, -v7 ; encoding: [0x01,0xb1,0x44,0xcf,0x06,0x30,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0xb1,0x44,0xcf,0x06,0x30,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x44,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x44,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x61,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x85 ; encoding: [0x04,0x21,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x85,0x07] +0x04,0x21,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x85,0x07 + +# GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +0x04,0x31,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07 + +# GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x51,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_lshlrev_b32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x41,0x45,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x45,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x45,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x45,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x45,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x44,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x45,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x45,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x45,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x45,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x44,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x45,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x45,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x45,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x45,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x44,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x44,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x45,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x45,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x45,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x45,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x44,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x44,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x45,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x45,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x45,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x45,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x44,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x44,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x44,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x45,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x45,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x45,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x45,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x44,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x44,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x45,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x45,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x44,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x45,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x45,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x44,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x44,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x44,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x45,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x45,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x45,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x45,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x44,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x44,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x44,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x45,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x45,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x44,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x45,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x45,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x44,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x44,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x45,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x45,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x45,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x45,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x44,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x44,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x44,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x45,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x45,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x45,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x45,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x44,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x44,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x44,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x45,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x45,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x45,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x45,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x44,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x44,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x44,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x45,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x45,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x45,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x45,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x44,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x44,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x44,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x45,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x45,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x45,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x45,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x44,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x44,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x44,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x45,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x45,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x44,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x45,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x45,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x44,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x44,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x44,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x45,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x45,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x44,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x45,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x45,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x44,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshlrev_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x44,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x44,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09] +0x04,0x61,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09 + +# GFX1250: v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09] +0x04,0x51,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09 + +# GFX1250: v_dual_lshrrev_b32 v1, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09] +0x04,0x41,0x55,0xcf,0x01,0x01,0x02,0x00,0x01,0x0d,0x00,0x09 + +# GFX1250: v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x54,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x54,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x88 ; encoding: [0x04,0x21,0x55,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x88,0x07] +0x04,0x21,0x55,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x88,0x07 + +# GFX1250: v_dual_lshrrev_b32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x55,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +0x04,0x31,0x55,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x55,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x55,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x55,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x55,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x54,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x55,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x55,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x55,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x55,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x54,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x55,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x55,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x55,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x55,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x54,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x54,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x55,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x55,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x55,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x55,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x54,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x54,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x55,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x55,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x55,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x55,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x54,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x54,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x54,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x55,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x55,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x55,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x55,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x54,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x54,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x55,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x55,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x54,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x55,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x55,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x54,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x54,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x54,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x55,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x55,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x55,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x55,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x54,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x54,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x54,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x55,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x55,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x54,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x55,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x55,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x54,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x54,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x55,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x55,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x55,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x55,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x54,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x54,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x54,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x55,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x55,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x55,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x55,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x54,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x54,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x54,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x55,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x55,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x55,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x55,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x54,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x54,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x54,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x55,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x55,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x55,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x55,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x54,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x54,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x54,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x55,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x55,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x55,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x55,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x54,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x54,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x54,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x55,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x55,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x54,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x55,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x55,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x54,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x54,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x54,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x55,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x55,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x54,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x55,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x55,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x54,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_lshrrev_b32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x54,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x54,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v0, -v1, v2 :: v_dual_min_num_f32 v5, v6, v7 ; encoding: [0x01,0xb1,0x28,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0xb1,0x28,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_max_num_f32 v0, -v1, v2 :: v_dual_mul_dx9_zero_f32 v5, -v6, -v7 ; encoding: [0x01,0x71,0x28,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x71,0x28,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_max_num_f32 v0, v1, -v2 :: v_dual_cndmask_b32 v5, v6, v7, s96 ; encoding: [0x01,0x91,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x60,0x05] +0x01,0x91,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x60,0x05 + +# GFX1250: v_dual_max_num_f32 v0, v1, -v2 :: v_dual_cndmask_b32 v5, v6, v7, vcc_lo ; encoding: [0x01,0x91,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x6a,0x05] +0x01,0x91,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x6a,0x05 + +# GFX1250: v_dual_max_num_f32 v0, v1, -v2 :: v_dual_mov_b32 v5, v6 ; encoding: [0x01,0x81,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x00,0x00,0x05] +0x01,0x81,0x28,0xcf,0x06,0x05,0x02,0x00,0x00,0x00,0x00,0x05 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x80,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x29,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x28,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x80,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x29,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x28,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x80,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x29,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x28,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x28,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x80,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x29,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x28,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x28,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x80,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x29,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x28,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x28,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x28,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x80,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x29,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x28,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x28,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x28,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x80,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x29,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x28,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x28,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x28,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x80,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x29,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x28,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x28,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x28,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x28,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x80,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x29,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x28,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x28,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x81,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x29,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x28,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x28,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x28,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x81,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x29,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x28,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x28,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x28,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x81,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x29,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x28,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x28,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x28,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x81,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x29,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x28,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x28,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x28,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x6e ; encoding: [0x04,0x21,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6e,0x07] +0x04,0x21,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6e,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x81,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x29,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x28,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x28,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x28,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x28,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x80,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x29,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x28,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x28,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x28,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x28,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x80,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x29,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x28,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x28,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x28,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[0:1], -v[8:9], -v[4:5] :: v_dual_mul_f32 v5, -v6, v7 ; encoding: [0x08,0x31,0x8c,0xcf,0x06,0x17,0x04,0x00,0x00,0x07,0x00,0x05] +0x08,0x31,0x8c,0xcf,0x06,0x17,0x04,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_max_num_f64 v[0:1], v[8:9], -v[4:5] :: v_dual_sub_nc_u32 v5, v6, v7 ; encoding: [0x08,0x41,0x8d,0xcf,0x06,0x05,0x04,0x00,0x00,0x07,0x00,0x05] +0x08,0x41,0x8d,0xcf,0x06,0x05,0x04,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_max_num_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08] +0x06,0x41,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x40,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x8d,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x00,0x8d,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07] +0xc1,0x90,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09] +0xc1,0x00,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x8d,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x10,0x8d,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0xa0,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0xb0,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07] +0xc1,0x80,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x70,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x30,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x50,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x60,0x8c,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x40,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x8d,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x00,0x8d,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07] +0xf0,0x90,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09] +0xf0,0x00,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x8d,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x10,0x8d,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0xa0,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0xb0,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07] +0xf0,0x80,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x70,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x30,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x50,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x60,0x8c,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x8c,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x7e,0x80,0x8c,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x40,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x8d,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x00,0x8d,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x8c,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x7e,0x90,0x8c,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x7e,0x00,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x8d,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x10,0x8d,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0xa0,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0xb0,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x70,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x30,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x50,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x60,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x8c,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x68,0x80,0x8c,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x40,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x8d,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x00,0x8d,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x68,0x00,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x8d,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x10,0x8d,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0xa0,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0xb0,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x70,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x30,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x50,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x60,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x8c,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x02,0x80,0x8c,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x02,0x90,0x8c,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x8c,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0xfd,0x80,0x8c,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x40,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x8d,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x00,0x8d,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +0xfd,0x90,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0xfd,0x00,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x8d,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x10,0x8d,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0xa0,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0xb0,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x70,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x30,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x50,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x60,0x8c,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x8c,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x7a,0x80,0x8c,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x40,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x8d,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x00,0x8d,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x7a,0x90,0x8c,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0x7a,0x00,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x8d,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x10,0x8d,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0xa0,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0xb0,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x70,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x30,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x50,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x60,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x8c,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0xfe,0x81,0x8c,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x41,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x8d,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x01,0x8d,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0xfe,0x91,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0xfe,0x01,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x8d,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x11,0x8d,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0xa1,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0xb1,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x71,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x31,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x51,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x61,0x8c,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x8c,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x02,0x81,0x8c,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x41,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x8d,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x01,0x8d,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x02,0x91,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x02,0x01,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x8d,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x11,0x8d,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0xa1,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0xb1,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x71,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x31,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x51,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x61,0x8c,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x8c,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x04,0x81,0x8c,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x8d,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x01,0x8d,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x8d,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x01,0x8d,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07] +0x04,0x91,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x04,0x01,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x04,0x01,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x8d,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x11,0x8d,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x8d,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x11,0x8d,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x71,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x71,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x31,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x31,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x51,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x51,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x61,0x8c,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x61,0x8c,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x8c,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x06,0x81,0x8c,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x41,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x8d,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x01,0x8d,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x06,0x91,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x06,0x01,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x8d,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x11,0x8d,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0xa1,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0xb1,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x71,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x31,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x51,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x61,0x8c,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x8c,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x6a,0x80,0x8c,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x40,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x8d,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x00,0x8d,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x6a,0x90,0x8c,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0x6a,0x00,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x8d,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x10,0x8d,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0xa0,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0xb0,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x70,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x30,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x50,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x60,0x8c,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x61,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x94 ; encoding: [0x06,0x21,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x94,0x07] +0x06,0x21,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x94,0x07 + +# GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09] +0x06,0x31,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09 + +# GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x51,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_max_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x41,0x8d,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_min_num_f32 v0, -v1, v2 :: v_dual_sub_f32 v5, -v6, -v7 ; encoding: [0x01,0x51,0x2c,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x51,0x2c,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_min_num_f32 v0, v1, -v2 :: v_dual_add_nc_u32 v5, v6, v7 ; encoding: [0x01,0x01,0x2d,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x01,0x2d,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x80,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x2d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x2c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x80,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x2d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x2c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x80,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x2d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x2c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x2c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x80,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x2d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x2c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x2c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x80,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x2d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x2c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x2c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x2c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x80,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x2d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x2c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x2c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x2c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x80,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x2d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x2c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x2c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x2c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x80,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x2d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x2c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x2c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x2c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x2c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x80,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x2d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x2c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x2c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x81,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x2d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x2c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x2c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x2c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x81,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x2d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x2c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x2c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x2c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x81,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x2d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x2c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x2c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x2c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x81,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x2d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x2c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x2c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x2c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0xff ; encoding: [0x04,0x21,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0xff,0x07] +0x04,0x21,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0xff,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x81,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x2d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x2c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x2c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x2c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x2c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x80,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x2d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x2c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x2c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x2c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x2c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x80,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x2d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x2c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x2c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x2c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[0:1], -s[8:9], v[4:5] :: v_dual_ashrrev_i32 v5, v6, v7 ; encoding: [0x08,0x60,0x91,0xcf,0x06,0x03,0x04,0x00,0x00,0x07,0x00,0x05] +0x08,0x60,0x91,0xcf,0x06,0x03,0x04,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_min_num_f64 v[0:1], -v[8:9], -v[4:5] :: v_dual_subrev_f32 v5, v6, v7 ; encoding: [0x08,0x61,0x90,0xcf,0x06,0x07,0x04,0x00,0x00,0x07,0x00,0x05] +0x08,0x61,0x90,0xcf,0x06,0x07,0x04,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_min_num_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x90,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08] +0x06,0x41,0x90,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x40,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x91,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x00,0x91,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07] +0xc1,0x90,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09] +0xc1,0x00,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x91,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x10,0x91,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0xa0,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0xb0,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07] +0xc1,0x80,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x70,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x30,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x50,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x60,0x90,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x40,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x91,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x00,0x91,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07] +0xf0,0x90,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09] +0xf0,0x00,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x91,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x10,0x91,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0xa0,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0xb0,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07] +0xf0,0x80,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x70,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x30,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x50,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x60,0x90,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x90,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x7e,0x80,0x90,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x40,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x91,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x00,0x91,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x90,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x7e,0x90,0x90,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x7e,0x00,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x91,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x10,0x91,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0xa0,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0xb0,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x70,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x30,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x50,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x60,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x90,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x68,0x80,0x90,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x40,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x91,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x00,0x91,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x68,0x00,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x91,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x10,0x91,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0xa0,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0xb0,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x70,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x30,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x50,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x60,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x90,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x02,0x80,0x90,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x02,0x90,0x90,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x90,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0xfd,0x80,0x90,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x40,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x91,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x00,0x91,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +0xfd,0x90,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0xfd,0x00,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x91,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x10,0x91,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0xa0,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0xb0,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x70,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x30,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x50,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x60,0x90,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x90,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x7a,0x80,0x90,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x40,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x91,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x00,0x91,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x7a,0x90,0x90,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0x7a,0x00,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x91,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x10,0x91,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0xa0,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0xb0,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x70,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x30,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x50,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x60,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x90,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0xfe,0x81,0x90,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x41,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x91,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x01,0x91,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0xfe,0x91,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0xfe,0x01,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x91,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x11,0x91,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0xa1,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0xb1,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x71,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x31,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x51,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x61,0x90,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x90,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x02,0x81,0x90,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x41,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x91,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x01,0x91,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x02,0x91,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x02,0x01,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x91,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x11,0x91,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0xa1,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0xb1,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x71,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x31,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x51,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x61,0x90,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x90,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x04,0x81,0x90,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x91,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x01,0x91,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x91,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x01,0x91,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07] +0x04,0x91,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x04,0x01,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x04,0x01,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x91,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x11,0x91,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x91,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x11,0x91,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x71,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x71,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x31,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x31,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x51,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x51,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x61,0x90,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x61,0x90,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x90,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x06,0x81,0x90,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x41,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x91,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x01,0x91,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x06,0x91,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x06,0x01,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x91,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x11,0x91,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0xa1,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0xb1,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x71,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x31,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x51,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x61,0x90,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x90,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x6a,0x80,0x90,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x40,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x91,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x00,0x91,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x6a,0x90,0x90,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0x6a,0x00,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x91,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x10,0x91,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0xa0,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0xb0,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x70,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x30,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x50,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x60,0x90,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x61,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x95 ; encoding: [0x06,0x21,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x95,0x07] +0x06,0x21,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x95,0x07 + +# GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09] +0x06,0x31,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09 + +# GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x51,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_min_num_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x41,0x91,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_mov_b32 v0, v1 :: v_dual_max_num_f32 v5, -s6, -v7 ; encoding: [0x01,0xa1,0x20,0xcf,0x06,0x30,0x00,0x00,0x00,0x07,0x00,0x05] +0x01,0xa1,0x20,0xcf,0x06,0x30,0x00,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_add_f32 v7, src_scc, v4 ; encoding: [0xc1,0x40,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x40,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_add_nc_u32 v7, src_scc, v4 ; encoding: [0xc1,0x00,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x00,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_ashrrev_i32 v7, src_scc, v4 ; encoding: [0xc1,0x60,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x60,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_cndmask_b32 v7, src_scc, v4, vcc_lo ; encoding: [0xc1,0x90,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x6a,0x07] +0xc1,0x90,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_fmac_f32 v7, src_scc, v4 ; encoding: [0xc1,0x00,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x00,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_lshlrev_b32 v7, src_scc, v4 ; encoding: [0xc1,0x10,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x10,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_lshrrev_b32 v7, src_scc, v4 ; encoding: [0xc1,0x50,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x50,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_max_i32 v7, src_scc, v4 ; encoding: [0xc1,0x70,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x70,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_max_num_f32 v7, src_scc, v4 ; encoding: [0xc1,0xa0,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0xa0,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_min_i32 v7, src_scc, v4 ; encoding: [0xc1,0x80,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x80,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_min_num_f32 v7, src_scc, v4 ; encoding: [0xc1,0xb0,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0xb0,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v4 ; encoding: [0xc1,0x70,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x70,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_mul_f32 v7, src_scc, v4 ; encoding: [0xc1,0x30,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x30,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_sub_f32 v7, src_scc, v4 ; encoding: [0xc1,0x50,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x50,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_sub_nc_u32 v7, src_scc, v4 ; encoding: [0xc1,0x40,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x40,0x21,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, -1 :: v_dual_subrev_f32 v7, src_scc, v4 ; encoding: [0xc1,0x60,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07] +0xc1,0x60,0x20,0xcf,0xfd,0x00,0x00,0x00,0xff,0x04,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_add_f32 v7, 0.5, v3 ; encoding: [0xf0,0x40,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x40,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_add_nc_u32 v7, 0.5, v3 ; encoding: [0xf0,0x00,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x00,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_ashrrev_i32 v7, 0.5, v3 ; encoding: [0xf0,0x60,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x60,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_cndmask_b32 v7, 0.5, v3, vcc_lo ; encoding: [0xf0,0x90,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x6a,0x07] +0xf0,0x90,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_fmac_f32 v7, 0.5, v3 ; encoding: [0xf0,0x00,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x00,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_lshlrev_b32 v7, 0.5, v3 ; encoding: [0xf0,0x10,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x10,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_lshrrev_b32 v7, 0.5, v3 ; encoding: [0xf0,0x50,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x50,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_max_i32 v7, 0.5, v3 ; encoding: [0xf0,0x70,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x70,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_max_num_f32 v7, 0.5, v3 ; encoding: [0xf0,0xa0,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0xa0,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_min_i32 v7, 0.5, v3 ; encoding: [0xf0,0x80,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x80,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_min_num_f32 v7, 0.5, v3 ; encoding: [0xf0,0xb0,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0xb0,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v3 ; encoding: [0xf0,0x70,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x70,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_mul_f32 v7, 0.5, v3 ; encoding: [0xf0,0x30,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x30,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_sub_f32 v7, 0.5, v3 ; encoding: [0xf0,0x50,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x50,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_sub_nc_u32 v7, 0.5, v3 ; encoding: [0xf0,0x40,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x40,0x21,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, 0.5 :: v_dual_subrev_f32 v7, 0.5, v3 ; encoding: [0xf0,0x60,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07] +0xf0,0x60,0x20,0xcf,0xf0,0x00,0x00,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_add_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x40,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x40,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_add_nc_u32 v7, vcc_hi, v255 ; encoding: [0x7f,0x00,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x00,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_ashrrev_i32 v7, vcc_hi, v255 ; encoding: [0x7f,0x60,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x60,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_cndmask_b32 v7, exec_hi, v255, vcc_lo ; encoding: [0x7f,0x90,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0x7f,0x90,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_fmac_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x00,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x00,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_lshlrev_b32 v7, vcc_hi, v255 ; encoding: [0x7f,0x10,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x10,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_lshrrev_b32 v7, vcc_hi, v255 ; encoding: [0x7f,0x50,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x50,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_max_i32 v7, vcc_hi, v255 ; encoding: [0x7f,0x70,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x70,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_max_num_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0xa0,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0xa0,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_min_i32 v7, vcc_hi, v255 ; encoding: [0x7f,0x80,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x80,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_min_num_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0xb0,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0xb0,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x70,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x70,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_mul_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x30,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x30,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_sub_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x50,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x50,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_sub_nc_u32 v7, vcc_hi, v255 ; encoding: [0x7f,0x40,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x40,0x21,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_hi :: v_dual_subrev_f32 v7, vcc_hi, v255 ; encoding: [0x7f,0x60,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7f,0x60,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_add_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x40,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x40,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_add_nc_u32 v7, ttmp15, v255 ; encoding: [0x7e,0x00,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x00,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_ashrrev_i32 v7, ttmp15, v255 ; encoding: [0x7e,0x60,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x60,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_cndmask_b32 v7, exec_lo, v255, vcc_lo ; encoding: [0x7e,0x90,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0x7e,0x90,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_fmac_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x00,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x00,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_lshlrev_b32 v7, ttmp15, v255 ; encoding: [0x7e,0x10,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x10,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_lshrrev_b32 v7, ttmp15, v255 ; encoding: [0x7e,0x50,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x50,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_max_i32 v7, ttmp15, v255 ; encoding: [0x7e,0x70,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x70,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_max_num_f32 v7, ttmp15, v255 ; encoding: [0x7e,0xa0,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0xa0,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_min_i32 v7, ttmp15, v255 ; encoding: [0x7e,0x80,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x80,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_min_num_f32 v7, ttmp15, v255 ; encoding: [0x7e,0xb0,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0xb0,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x70,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x70,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_mul_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x30,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x30,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_sub_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x50,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x50,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_sub_nc_u32 v7, ttmp15, v255 ; encoding: [0x7e,0x40,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x40,0x21,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, exec_lo :: v_dual_subrev_f32 v7, ttmp15, v255 ; encoding: [0x7e,0x60,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7e,0x60,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_add_f32 v7, m0, v255 ; encoding: [0x7d,0x40,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x40,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_add_nc_u32 v7, m0, v255 ; encoding: [0x7d,0x00,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x00,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_ashrrev_i32 v7, m0, v255 ; encoding: [0x7d,0x60,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x60,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_cndmask_b32 v7, m0, v255, vcc_lo ; encoding: [0x7d,0x90,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0x7d,0x90,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_fmac_f32 v7, m0, v255 ; encoding: [0x7d,0x00,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x00,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_lshlrev_b32 v7, m0, v255 ; encoding: [0x7d,0x10,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x10,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_lshrrev_b32 v7, m0, v255 ; encoding: [0x7d,0x50,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x50,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_max_i32 v7, m0, v255 ; encoding: [0x7d,0x70,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x70,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_max_num_f32 v7, m0, v255 ; encoding: [0x7d,0xa0,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0xa0,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_min_i32 v7, m0, v255 ; encoding: [0x7d,0x80,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x80,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_min_num_f32 v7, m0, v255 ; encoding: [0x7d,0xb0,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0xb0,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mul_dx9_zero_f32 v7, m0, v255 ; encoding: [0x7d,0x70,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x70,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_mul_f32 v7, m0, v255 ; encoding: [0x7d,0x30,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x30,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_sub_f32 v7, m0, v255 ; encoding: [0x7d,0x50,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x50,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_sub_nc_u32 v7, m0, v255 ; encoding: [0x7d,0x40,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x40,0x21,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, m0 :: v_dual_subrev_f32 v7, m0, v255 ; encoding: [0x7d,0x60,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7d,0x60,0x20,0xcf,0x7d,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_add_f32 v7, s105, v255 ; encoding: [0x01,0x40,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x40,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_add_nc_u32 v7, s105, v255 ; encoding: [0x01,0x00,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x00,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_ashrrev_i32 v7, s105, v255 ; encoding: [0x01,0x60,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x60,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_cndmask_b32 v7, s1, v255, vcc_lo ; encoding: [0x01,0x90,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0x01,0x90,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_fmac_f32 v7, s105, v255 ; encoding: [0x01,0x00,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x00,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_lshlrev_b32 v7, s105, v255 ; encoding: [0x01,0x10,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x10,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_lshrrev_b32 v7, s105, v255 ; encoding: [0x01,0x50,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x50,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_max_i32 v7, s105, v255 ; encoding: [0x01,0x70,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x70,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_max_num_f32 v7, s105, v255 ; encoding: [0x01,0xa0,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0xa0,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_min_i32 v7, s105, v255 ; encoding: [0x01,0x80,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x80,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_min_num_f32 v7, s105, v255 ; encoding: [0x01,0xb0,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0xb0,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mul_dx9_zero_f32 v7, s105, v255 ; encoding: [0x01,0x70,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x70,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_mul_f32 v7, s105, v255 ; encoding: [0x01,0x30,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x30,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_sub_f32 v7, s105, v255 ; encoding: [0x01,0x50,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x50,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_sub_nc_u32 v7, s105, v255 ; encoding: [0x01,0x40,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x40,0x21,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s1 :: v_dual_subrev_f32 v7, s105, v255 ; encoding: [0x01,0x60,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x60,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_add_f32 v7, s1, v255 ; encoding: [0x69,0x40,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x40,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_add_nc_u32 v7, s1, v255 ; encoding: [0x69,0x00,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x00,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_ashrrev_i32 v7, s1, v255 ; encoding: [0x69,0x60,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x60,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_cndmask_b32 v7, s105, v255, vcc_lo ; encoding: [0x69,0x90,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0x69,0x90,0x20,0xcf,0x69,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_fmac_f32 v7, s1, v255 ; encoding: [0x69,0x00,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x00,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_lshlrev_b32 v7, s1, v255 ; encoding: [0x69,0x10,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x10,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_lshrrev_b32 v7, s1, v255 ; encoding: [0x69,0x50,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x50,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_max_i32 v7, s1, v255 ; encoding: [0x69,0x70,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x70,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_max_num_f32 v7, s1, v255 ; encoding: [0x69,0xa0,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0xa0,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_min_i32 v7, s1, v255 ; encoding: [0x69,0x80,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x80,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_min_num_f32 v7, s1, v255 ; encoding: [0x69,0xb0,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0xb0,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mul_dx9_zero_f32 v7, s1, v255 ; encoding: [0x69,0x70,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x70,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_mul_f32 v7, s1, v255 ; encoding: [0x69,0x30,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x30,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_sub_f32 v7, s1, v255 ; encoding: [0x69,0x50,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x50,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_sub_nc_u32 v7, s1, v255 ; encoding: [0x69,0x40,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x40,0x21,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, s105 :: v_dual_subrev_f32 v7, s1, v255 ; encoding: [0x69,0x60,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x69,0x60,0x20,0xcf,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_add_f32 v7, -1, v255 ; encoding: [0xfd,0x40,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x40,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_add_nc_u32 v7, -1, v255 ; encoding: [0xfd,0x00,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x00,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_ashrrev_i32 v7, -1, v255 ; encoding: [0xfd,0x60,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x60,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_cndmask_b32 v7, -1, v255, vcc_lo ; encoding: [0xfd,0x90,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0xfd,0x90,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_fmac_f32 v7, -1, v255 ; encoding: [0xfd,0x00,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x00,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_lshlrev_b32 v7, -1, v255 ; encoding: [0xfd,0x10,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x10,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_lshrrev_b32 v7, -1, v255 ; encoding: [0xfd,0x50,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x50,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_max_i32 v7, -1, v255 ; encoding: [0xfd,0x70,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x70,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_max_num_f32 v7, -1, v255 ; encoding: [0xfd,0xa0,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0xa0,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_min_i32 v7, -1, v255 ; encoding: [0xfd,0x80,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x80,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_min_num_f32 v7, -1, v255 ; encoding: [0xfd,0xb0,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0xb0,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mul_dx9_zero_f32 v7, -1, v255 ; encoding: [0xfd,0x70,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x70,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_mul_f32 v7, -1, v255 ; encoding: [0xfd,0x30,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x30,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_sub_f32 v7, -1, v255 ; encoding: [0xfd,0x50,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x50,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_sub_nc_u32 v7, -1, v255 ; encoding: [0xfd,0x40,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x40,0x21,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, src_scc :: v_dual_subrev_f32 v7, -1, v255 ; encoding: [0xfd,0x60,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0xfd,0x60,0x20,0xcf,0xc1,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_add_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x40,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x40,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_add_nc_u32 v7, vcc_lo, v255 ; encoding: [0x7b,0x00,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x00,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_ashrrev_i32 v7, vcc_lo, v255 ; encoding: [0x7b,0x60,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x60,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_cndmask_b32 v7, ttmp15, v255, vcc_lo ; encoding: [0x7b,0x90,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0x7b,0x90,0x20,0xcf,0x7b,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_fmac_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x00,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x00,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_lshlrev_b32 v7, vcc_lo, v255 ; encoding: [0x7b,0x10,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x10,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_lshrrev_b32 v7, vcc_lo, v255 ; encoding: [0x7b,0x50,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x50,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_max_i32 v7, vcc_lo, v255 ; encoding: [0x7b,0x70,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x70,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_max_num_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0xa0,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0xa0,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_min_i32 v7, vcc_lo, v255 ; encoding: [0x7b,0x80,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x80,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_min_num_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0xb0,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0xb0,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x70,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x70,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_mul_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x30,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x30,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x50,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x50,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_sub_nc_u32 v7, vcc_lo, v255 ; encoding: [0x7b,0x40,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x40,0x21,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, ttmp15 :: v_dual_subrev_f32 v7, vcc_lo, v255 ; encoding: [0x7b,0x60,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x7b,0x60,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_add_f32 v7, v255, v255 ; encoding: [0x01,0x41,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x41,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_add_nc_u32 v7, v255, v255 ; encoding: [0x01,0x01,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x01,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_ashrrev_i32 v7, v255, v255 ; encoding: [0x01,0x61,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x61,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_cndmask_b32 v7, v255, v255, vcc_lo ; encoding: [0x01,0x91,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +0x01,0x91,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_fmac_f32 v7, v255, v255 ; encoding: [0x01,0x01,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x01,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_lshlrev_b32 v7, v255, v255 ; encoding: [0x01,0x11,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x11,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_lshrrev_b32 v7, v255, v255 ; encoding: [0x01,0x51,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x51,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_max_i32 v7, v255, v255 ; encoding: [0x01,0x71,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x71,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_max_num_f32 v7, v255, v255 ; encoding: [0x01,0xa1,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0xa1,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_min_i32 v7, v255, v255 ; encoding: [0x01,0x81,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x81,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_min_num_f32 v7, v255, v255 ; encoding: [0x01,0xb1,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0xb1,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mul_dx9_zero_f32 v7, v255, v255 ; encoding: [0x01,0x71,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x71,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_mul_f32 v7, v255, v255 ; encoding: [0x01,0x31,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x31,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_sub_f32 v7, v255, v255 ; encoding: [0x01,0x51,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x51,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_sub_nc_u32 v7, v255, v255 ; encoding: [0x01,0x41,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x41,0x21,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v1 :: v_dual_subrev_f32 v7, v255, v255 ; encoding: [0x01,0x61,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x01,0x61,0x20,0xcf,0xff,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_add_f32 v7, v3, v255 ; encoding: [0x02,0x41,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x41,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_add_nc_u32 v7, v3, v255 ; encoding: [0x02,0x01,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x01,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_ashrrev_i32 v7, v3, v255 ; encoding: [0x02,0x61,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x61,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_cndmask_b32 v7, v3, v255, vcc_lo ; encoding: [0x02,0x91,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +0x02,0x91,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_fmac_f32 v7, v3, v255 ; encoding: [0x02,0x01,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x01,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_lshlrev_b32 v7, v3, v255 ; encoding: [0x02,0x11,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x11,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_lshrrev_b32 v7, v3, v255 ; encoding: [0x02,0x51,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x51,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_max_i32 v7, v3, v255 ; encoding: [0x02,0x71,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x71,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_max_num_f32 v7, v3, v255 ; encoding: [0x02,0xa1,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0xa1,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_min_i32 v7, v3, v255 ; encoding: [0x02,0x81,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x81,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_min_num_f32 v7, v3, v255 ; encoding: [0x02,0xb1,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0xb1,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v255 ; encoding: [0x02,0x71,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x71,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_mul_f32 v7, v3, v255 ; encoding: [0x02,0x31,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x31,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_sub_f32 v7, v3, v255 ; encoding: [0x02,0x51,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x51,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_sub_nc_u32 v7, v3, v255 ; encoding: [0x02,0x41,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x41,0x21,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v2 :: v_dual_subrev_f32 v7, v3, v255 ; encoding: [0x02,0x61,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x02,0x61,0x20,0xcf,0x03,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_add_f32 v7, v2, v255 ; encoding: [0xff,0x41,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x41,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_add_nc_u32 v7, v2, v255 ; encoding: [0xff,0x01,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x01,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_ashrrev_i32 v7, v2, v255 ; encoding: [0xff,0x61,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x61,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_cndmask_b32 v7, v2, v255, vcc_lo ; encoding: [0xff,0x91,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +0xff,0x91,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_fmac_f32 v7, v2, v255 ; encoding: [0xff,0x01,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x01,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_lshlrev_b32 v7, v2, v255 ; encoding: [0xff,0x11,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x11,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_lshrrev_b32 v7, v2, v255 ; encoding: [0xff,0x51,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x51,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_max_i32 v7, v2, v255 ; encoding: [0xff,0x71,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x71,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_max_num_f32 v7, v2, v255 ; encoding: [0xff,0xa1,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0xa1,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_min_i32 v7, v2, v255 ; encoding: [0xff,0x81,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x81,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_min_num_f32 v7, v2, v255 ; encoding: [0xff,0xb1,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0xb1,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mul_dx9_zero_f32 v7, v2, v255 ; encoding: [0xff,0x71,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x71,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_mul_f32 v7, v2, v255 ; encoding: [0xff,0x31,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x31,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_sub_f32 v7, v2, v255 ; encoding: [0xff,0x51,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x51,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_sub_nc_u32 v7, v2, v255 ; encoding: [0xff,0x41,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x41,0x21,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v255 :: v_dual_subrev_f32 v7, v2, v255 ; encoding: [0xff,0x61,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0xff,0x61,0x20,0xcf,0x02,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_add_f32 v7, v4, v255 ; encoding: [0x03,0x41,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x41,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_add_nc_u32 v7, v4, v255 ; encoding: [0x03,0x01,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x01,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_ashrrev_i32 v7, v4, v255 ; encoding: [0x03,0x61,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x61,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_cndmask_b32 v7, v4, v255, vcc_lo ; encoding: [0x03,0x91,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +0x03,0x91,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_fmac_f32 v7, v4, v255 ; encoding: [0x03,0x01,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x01,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_lshlrev_b32 v7, v4, v255 ; encoding: [0x03,0x11,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x11,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_lshrrev_b32 v7, v4, v255 ; encoding: [0x03,0x51,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x51,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_max_i32 v7, v4, v255 ; encoding: [0x03,0x71,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x71,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_max_num_f32 v7, v4, v255 ; encoding: [0x03,0xa1,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0xa1,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_min_i32 v7, v4, v255 ; encoding: [0x03,0x81,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x81,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_min_num_f32 v7, v4, v255 ; encoding: [0x03,0xb1,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0xb1,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mul_dx9_zero_f32 v7, v4, v255 ; encoding: [0x03,0x71,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x71,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_mul_f32 v7, v4, v255 ; encoding: [0x03,0x31,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x31,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_sub_f32 v7, v4, v255 ; encoding: [0x03,0x51,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x51,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_sub_nc_u32 v7, v4, v255 ; encoding: [0x03,0x41,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x41,0x21,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v3 :: v_dual_subrev_f32 v7, v4, v255 ; encoding: [0x03,0x61,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x03,0x61,0x20,0xcf,0x04,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_add_f32 v7, v1, v255 ; encoding: [0x04,0x41,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x41,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_add_nc_u32 v7, v1, v255 ; encoding: [0x04,0x01,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x01,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_ashrrev_i32 v7, v1, v255 ; encoding: [0x04,0x61,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x61,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0xfe ; encoding: [0x04,0x21,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0x03,0xfe,0x07] +0x04,0x21,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0x03,0xfe,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v7, v1, v255, s96 ; encoding: [0x04,0x91,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x60,0x07] +0x04,0x91,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x60,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_cndmask_b32 v7, v1, v255, vcc_lo ; encoding: [0x04,0x91,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x6a,0x07] +0x04,0x91,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_fmac_f32 v7, v1, v255 ; encoding: [0x04,0x01,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x01,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_lshlrev_b32 v7, v1, v255 ; encoding: [0x04,0x11,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x11,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_lshrrev_b32 v7, v1, v255 ; encoding: [0x04,0x51,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x51,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_max_i32 v7, v1, v255 ; encoding: [0x04,0x71,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x71,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_max_num_f32 v7, v1, v255 ; encoding: [0x04,0xa1,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0xa1,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_min_i32 v7, v1, v255 ; encoding: [0x04,0x81,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x81,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_min_num_f32 v7, v1, v255 ; encoding: [0x04,0xb1,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0xb1,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mul_dx9_zero_f32 v7, v1, v255 ; encoding: [0x04,0x71,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x71,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_mul_f32 v7, v1, v255 ; encoding: [0x04,0x31,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x31,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_sub_f32 v7, v1, v255 ; encoding: [0x04,0x51,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x51,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_sub_nc_u32 v7, v1, v255 ; encoding: [0x04,0x41,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x41,0x21,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, v4 :: v_dual_subrev_f32 v7, v1, v255 ; encoding: [0x04,0x61,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07] +0x04,0x61,0x20,0xcf,0x01,0x01,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_add_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x40,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x40,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_add_nc_u32 v7, exec_lo, v255 ; encoding: [0x6b,0x00,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x00,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_ashrrev_i32 v7, exec_lo, v255 ; encoding: [0x6b,0x60,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x60,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_cndmask_b32 v7, vcc_hi, v255, vcc_lo ; encoding: [0x6b,0x90,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0x6b,0x90,0x20,0xcf,0x6b,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_fmac_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x00,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x00,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_lshlrev_b32 v7, exec_lo, v255 ; encoding: [0x6b,0x10,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x10,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_lshrrev_b32 v7, exec_lo, v255 ; encoding: [0x6b,0x50,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x50,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_max_i32 v7, exec_lo, v255 ; encoding: [0x6b,0x70,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x70,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_max_num_f32 v7, exec_lo, v255 ; encoding: [0x6b,0xa0,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0xa0,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_min_i32 v7, exec_lo, v255 ; encoding: [0x6b,0x80,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x80,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_min_num_f32 v7, exec_lo, v255 ; encoding: [0x6b,0xb0,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0xb0,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x70,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x70,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_mul_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x30,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x30,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x50,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x50,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_sub_nc_u32 v7, exec_lo, v255 ; encoding: [0x6b,0x40,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x40,0x21,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_hi :: v_dual_subrev_f32 v7, exec_lo, v255 ; encoding: [0x6b,0x60,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6b,0x60,0x20,0xcf,0x7e,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_add_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x40,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x40,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_add_nc_u32 v7, exec_hi, v255 ; encoding: [0x6a,0x00,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x00,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_ashrrev_i32 v7, exec_hi, v255 ; encoding: [0x6a,0x60,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x60,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_cndmask_b32 v7, vcc_lo, v255, vcc_lo ; encoding: [0x6a,0x90,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x6a,0x07] +0x6a,0x90,0x20,0xcf,0x6a,0x00,0x00,0x00,0xff,0xff,0x6a,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_fmac_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x00,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x00,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_lshlrev_b32 v7, exec_hi, v255 ; encoding: [0x6a,0x10,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x10,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_lshrrev_b32 v7, exec_hi, v255 ; encoding: [0x6a,0x50,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x50,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_max_i32 v7, exec_hi, v255 ; encoding: [0x6a,0x70,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x70,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_max_num_f32 v7, exec_hi, v255 ; encoding: [0x6a,0xa0,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0xa0,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_min_i32 v7, exec_hi, v255 ; encoding: [0x6a,0x80,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x80,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_min_num_f32 v7, exec_hi, v255 ; encoding: [0x6a,0xb0,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0xb0,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x70,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x70,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_mul_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x30,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x30,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x50,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x50,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_sub_nc_u32 v7, exec_hi, v255 ; encoding: [0x6a,0x40,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x40,0x21,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v255, vcc_lo :: v_dual_subrev_f32 v7, exec_hi, v255 ; encoding: [0x6a,0x60,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07] +0x6a,0x60,0x20,0xcf,0x7f,0x00,0x00,0x00,0xff,0xff,0x00,0x07 + +# GFX1250: v_dual_mov_b32 v25, v8 :: v_dual_mov_b32 v13, v16 ; encoding: [0x08,0x81,0x20,0xcf,0x10,0x01,0x00,0x00,0x19,0x00,0x00,0x0d] +0x08,0x81,0x20,0xcf,0x10,0x01,0x00,0x00,0x19,0x00,0x00,0x0d + +# GFX1250: v_dual_mul_dx9_zero_f32 v0, -v1, v2 :: v_dual_fma_f32 v5, -s6, -v7, -v8 ; encoding: [0x01,0x31,0x1d,0xcf,0x06,0x72,0x02,0x00,0x00,0x07,0x08,0x05] +0x01,0x31,0x1d,0xcf,0x06,0x72,0x02,0x00,0x00,0x07,0x08,0x05 + +# GFX1250: v_dual_mul_dx9_zero_f32 v0, -v1, v2 :: v_dual_lshlrev_b32 v5, v6, v7 ; encoding: [0x01,0x11,0x1d,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x11,0x1d,0xcf,0x06,0x03,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_mul_dx9_zero_f32 v0, v1, -v2 :: v_dual_bitop2_b32 v5, v6, v7 bitop3:1 ; encoding: [0x01,0x21,0x1d,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x01,0x05] +0x01,0x21,0x1d,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x01,0x05 + +# GFX1250: v_dual_mul_dx9_zero_f32 v0, v1, -v2 :: v_dual_mul_f32 v5, v6, -v7 ; encoding: [0x01,0x31,0x1c,0xcf,0x06,0x25,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x31,0x1c,0xcf,0x06,0x25,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x80,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x1d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x1c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x80,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x1d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x1c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x80,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x1d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x1c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x1c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x80,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x1d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x1c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x1c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x80,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x1d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x1c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x1c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x1c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x80,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x1d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x1c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x1c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x1c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x80,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x1d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x1c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x1c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x1c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x80,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x1d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x1c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x1c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x1c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x1c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x80,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x1d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x1c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x1c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x81,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x1d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x1c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x1c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x1c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x81,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x1d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x1c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x1c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x1c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x81,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x1d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x1c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x1c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x1c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x81,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x1d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x1c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x1c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x1c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x11 ; encoding: [0x04,0x21,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x11,0x07] +0x04,0x21,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x11,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x81,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x1d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x1c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x1c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x1c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x1c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x80,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x1d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x1c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x1c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x1c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x1c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x80,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x1d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x1c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_dx9_zero_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x1c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x1c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v0, -v1, -v2 :: v_dual_bitop2_b32 v5, v6, v7 bitop3:0x64 ; encoding: [0x01,0x21,0x0d,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x64,0x05] +0x01,0x21,0x0d,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x64,0x05 + +# GFX1250: v_dual_mul_f32 v0, -v1, -v2 :: v_dual_lshrrev_b32 v5, v6, v7 ; encoding: [0x01,0x51,0x0d,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x51,0x0d,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_mul_f32 v0, v1, v2 :: v_dual_fmac_f32 v5, -v6, -v7 ; encoding: [0x01,0x01,0x0c,0xcf,0x06,0x31,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x01,0x0c,0xcf,0x06,0x31,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_mul_f32 v0, v1, v2 :: v_dual_mul_dx9_zero_f32 v5, -v6, -v7 ; encoding: [0x01,0x71,0x0c,0xcf,0x06,0x31,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x71,0x0c,0xcf,0x06,0x31,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x80,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x0d,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x0c,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x80,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x0d,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x0c,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x80,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x0d,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x0c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x0c,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x80,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x0d,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x0c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x0c,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x80,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x0d,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x0c,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x0c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x0c,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x80,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x0d,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x0c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x0c,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x0c,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x80,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x0d,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x0c,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x0c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x0c,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x80,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x0d,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x0c,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x0c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x0c,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x0c,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x80,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x0d,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x0c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x0c,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x81,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x0d,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x0c,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x0c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x0c,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x81,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x0d,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x0c,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x0c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x0c,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x81,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x0d,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x0c,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x0c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x0c,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x81,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x0d,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x0c,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x0c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x0c,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x71 ; encoding: [0x04,0x21,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x71,0x07] +0x04,0x21,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x71,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x81,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x0d,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x0c,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x0c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x0c,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x0c,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x80,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x0d,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x0c,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x0c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x0c,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x0c,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x80,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x0d,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x0c,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x0c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x0c,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[0:1], -v[8:9], v[4:5] :: v_dual_add_f32 v5, -v6, v7 ; encoding: [0x08,0x41,0x88,0xcf,0x06,0x13,0x04,0x00,0x00,0x07,0x00,0x05] +0x08,0x41,0x88,0xcf,0x06,0x13,0x04,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_mul_f64 v[0:1], v[8:9], -v[4:5] :: v_dual_lshlrev_b32 v5, v6, v7 ; encoding: [0x08,0x11,0x89,0xcf,0x06,0x05,0x04,0x00,0x00,0x07,0x00,0x05] +0x08,0x11,0x89,0xcf,0x06,0x05,0x04,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_mul_f64 v[252:253], v[6:7], v[4:5] :: v_dual_add_f32 v8, v1, v3 ; encoding: [0x06,0x41,0x88,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08] +0x06,0x41,0x88,0xcf,0x01,0x01,0x04,0x00,0xfc,0x03,0x00,0x08 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x40,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x89,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x00,0x89,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07] +0xc1,0x90,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_fmac_f32 v9, src_scc, v5 ; encoding: [0xc1,0x00,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09] +0xc1,0x00,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x89,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x10,0x89,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0xa0,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0xb0,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07] +0xc1,0x80,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x70,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x30,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x50,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], -1, v[6:7] :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07] +0xc1,0x60,0x88,0xcf,0xfd,0x00,0x06,0x00,0xfe,0x05,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x40,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x89,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x00,0x89,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07] +0xf0,0x90,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_fmac_f32 v9, 0.5, v2 ; encoding: [0xf0,0x00,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09] +0xf0,0x00,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x89,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x10,0x89,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0xa0,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0xb0,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07] +0xf0,0x80,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x70,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x30,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x50,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], 0.5, v[8:9] :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07] +0xf0,0x60,0x88,0xcf,0xf0,0x00,0x08,0x00,0xfe,0x02,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[254:255] :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x88,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x7e,0x80,0x88,0xcf,0x7b,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_add_f32 v7, v15, v3 ; encoding: [0x7e,0x40,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x40,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_add_nc_u32 v7, v15, v3 ; encoding: [0x7e,0x00,0x89,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x00,0x89,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_cndmask_b32 v7, v10, v3, vcc_lo ; encoding: [0x7e,0x90,0x88,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x7e,0x90,0x88,0xcf,0x0a,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_fmac_f32 v9, v15, v3 ; encoding: [0x7e,0x00,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x7e,0x00,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_lshlrev_b32 v7, v15, v3 ; encoding: [0x7e,0x10,0x89,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x10,0x89,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_max_num_f32 v7, v15, v3 ; encoding: [0x7e,0xa0,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0xa0,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_min_num_f32 v7, v15, v3 ; encoding: [0x7e,0xb0,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0xb0,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v15, v3 ; encoding: [0x7e,0x70,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x70,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_mul_f32 v7, v15, v3 ; encoding: [0x7e,0x30,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x30,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_sub_f32 v7, v15, v3 ; encoding: [0x7e,0x50,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x50,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], exec, v[4:5] :: v_dual_subrev_f32 v7, v15, v3 ; encoding: [0x7e,0x60,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7e,0x60,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[254:255] :: v_dual_mov_b32 v7, s1 ; encoding: [0x68,0x80,0x88,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x68,0x80,0x88,0xcf,0x01,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x68,0x40,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x40,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x68,0x00,0x89,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x00,0x89,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_fmac_f32 v9, v1, v3 ; encoding: [0x68,0x00,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x68,0x00,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x68,0x10,0x89,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x10,0x89,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x68,0xa0,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0xa0,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x68,0xb0,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0xb0,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x68,0x70,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x70,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x68,0x30,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x30,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x68,0x50,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x50,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[104:105], v[4:5] :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x68,0x60,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x68,0x60,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[2:3], v[254:255] :: v_dual_mov_b32 v7, s105 ; encoding: [0x02,0x80,0x88,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x02,0x80,0x88,0xcf,0x69,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], s[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x02,0x90,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x02,0x90,0x88,0xcf,0x01,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[254:255] :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x88,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0xfd,0x80,0x88,0xcf,0xc1,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x40,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x89,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x00,0x89,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +0xfd,0x90,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_fmac_f32 v9, -1, v3 ; encoding: [0xfd,0x00,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0xfd,0x00,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x89,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x10,0x89,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0xa0,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0xb0,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x70,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x30,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x50,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], src_scc, v[4:5] :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfd,0x60,0x88,0xcf,0xc1,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[254:255] :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7a,0x80,0x88,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x7a,0x80,0x88,0xcf,0x6a,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x40,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x40,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7a,0x00,0x89,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x00,0x89,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_cndmask_b32 v7, v15, v3, vcc_lo ; encoding: [0x7a,0x90,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x7a,0x90,0x88,0xcf,0x0f,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_fmac_f32 v9, vcc_lo, v3 ; encoding: [0x7a,0x00,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0x7a,0x00,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7a,0x10,0x89,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x10,0x89,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xa0,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0xa0,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0xb0,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0xb0,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x70,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x70,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x30,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x30,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x50,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x50,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], ttmp[14:15], v[4:5] :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7a,0x60,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x7a,0x60,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0xfe,0x81,0x88,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0xfe,0x81,0x88,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_f32 v7, v5, v3 ; encoding: [0xfe,0x41,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x41,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_add_nc_u32 v7, v5, v3 ; encoding: [0xfe,0x01,0x89,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x01,0x89,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_cndmask_b32 v7, v5, v3, vcc_lo ; encoding: [0xfe,0x91,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0xfe,0x91,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_fmac_f32 v9, v5, v3 ; encoding: [0xfe,0x01,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0xfe,0x01,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_lshlrev_b32 v7, v5, v3 ; encoding: [0xfe,0x11,0x89,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x11,0x89,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_max_num_f32 v7, v5, v3 ; encoding: [0xfe,0xa1,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0xa1,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_min_num_f32 v7, v5, v3 ; encoding: [0xfe,0xb1,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0xb1,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v5, v3 ; encoding: [0xfe,0x71,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x71,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_mul_f32 v7, v5, v3 ; encoding: [0xfe,0x31,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x31,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_sub_f32 v7, v5, v3 ; encoding: [0xfe,0x51,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x51,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[254:255], v[4:5] :: v_dual_subrev_f32 v7, v5, v3 ; encoding: [0xfe,0x61,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0xfe,0x61,0x88,0xcf,0x05,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[254:255] :: v_dual_mov_b32 v7, v253 ; encoding: [0x02,0x81,0x88,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x02,0x81,0x88,0xcf,0xfd,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_f32 v7, v253, v3 ; encoding: [0x02,0x41,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x41,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_add_nc_u32 v7, v253, v3 ; encoding: [0x02,0x01,0x89,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x01,0x89,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_cndmask_b32 v7, v253, v3, vcc_lo ; encoding: [0x02,0x91,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x02,0x91,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_fmac_f32 v9, v253, v3 ; encoding: [0x02,0x01,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x02,0x01,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_lshlrev_b32 v7, v253, v3 ; encoding: [0x02,0x11,0x89,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x11,0x89,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_max_num_f32 v7, v253, v3 ; encoding: [0x02,0xa1,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0xa1,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_min_num_f32 v7, v253, v3 ; encoding: [0x02,0xb1,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0xb1,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v253, v3 ; encoding: [0x02,0x71,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x71,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_mul_f32 v7, v253, v3 ; encoding: [0x02,0x31,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x31,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_sub_f32 v7, v253, v3 ; encoding: [0x02,0x51,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x51,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[2:3], v[4:5] :: v_dual_subrev_f32 v7, v253, v3 ; encoding: [0x02,0x61,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x02,0x61,0x88,0xcf,0xfd,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[254:255] :: v_dual_mov_b32 v7, v3 ; encoding: [0x04,0x81,0x88,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x04,0x81,0x88,0xcf,0x03,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x04,0x41,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0x04,0x01,0x89,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x01,0x89,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x04,0x01,0x89,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x01,0x89,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, s96 ; encoding: [0x04,0x91,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07] +0x04,0x91,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x60,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0x04,0x91,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x04,0x91,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x04,0x91,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v2, v3 ; encoding: [0x04,0x01,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x04,0x01,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_fmac_f32 v9, v3, v3 ; encoding: [0x04,0x01,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x04,0x01,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0x04,0x11,0x89,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x11,0x89,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x04,0x11,0x89,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x11,0x89,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0x04,0xa1,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x04,0xa1,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xa1,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0x04,0xb1,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x04,0xb1,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0xb1,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0x04,0x71,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x71,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x04,0x71,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x71,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0x04,0x31,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x31,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x04,0x31,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x31,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0x04,0x51,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x51,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x04,0x51,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x51,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0x04,0x61,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x61,0x88,0xcf,0x02,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[4:5], v[4:5] :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x04,0x61,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x04,0x61,0x88,0xcf,0x03,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[254:255] :: v_dual_mov_b32 v7, v4 ; encoding: [0x06,0x81,0x88,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x06,0x81,0x88,0xcf,0x04,0x01,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x06,0x41,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x41,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x06,0x01,0x89,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x01,0x89,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x06,0x91,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x06,0x91,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_fmac_f32 v9, v4, v3 ; encoding: [0x06,0x01,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09] +0x06,0x01,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x06,0x11,0x89,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x11,0x89,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x06,0xa1,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0xa1,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x06,0xb1,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0xb1,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x06,0x71,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x71,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x06,0x31,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x31,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x06,0x51,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x51,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], v[6:7], v[4:5] :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x06,0x61,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07] +0x06,0x61,0x88,0xcf,0x04,0x01,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[254:255] :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x88,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07] +0x6a,0x80,0x88,0xcf,0x7f,0x00,0xfe,0x00,0xfe,0x00,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x40,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x89,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x00,0x89,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07] +0x6a,0x90,0x88,0xcf,0x6a,0x00,0x04,0x00,0xfe,0x03,0x6a,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_fmac_f32 v9, exec_hi, v3 ; encoding: [0x6a,0x00,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09] +0x6a,0x00,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x89,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x10,0x89,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0xa0,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0xb0,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x70,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x30,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x50,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[254:255], vcc, v[4:5] :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07] +0x6a,0x60,0x88,0xcf,0x7f,0x00,0x04,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_ashrrev_i32 v9, v1, v14 ; encoding: [0x06,0x61,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x61,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x93 ; encoding: [0x06,0x21,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x93,0x07] +0x06,0x21,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x03,0x93,0x07 + +# GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_fma_f32 v9, v1, v14, v4 ; encoding: [0x06,0x31,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09] +0x06,0x31,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x04,0x09 + +# GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_lshrrev_b32 v9, v1, v14 ; encoding: [0x06,0x51,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x51,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_mul_f64 v[2:3], v[6:7], v[4:5] :: v_dual_sub_nc_u32 v9, v1, v14 ; encoding: [0x06,0x41,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09] +0x06,0x41,0x89,0xcf,0x01,0x01,0x04,0x00,0x02,0x0e,0x00,0x09 + +# GFX1250: v_dual_sub_f32 v0, -v1, -v2 :: v_dual_lshrrev_b32 v5, v6, v7 ; encoding: [0x01,0x51,0x15,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x51,0x15,0xcf,0x06,0x07,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_sub_f32 v0, -v1, v2 :: v_dual_mul_f32 v5, -v6, -v7 ; encoding: [0x01,0x31,0x14,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x31,0x14,0xcf,0x06,0x33,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_sub_f32 v0, v1, -v2 :: v_dual_max_i32 v5, v6, v7 ; encoding: [0x01,0x71,0x15,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x71,0x15,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_sub_f32 v0, v1, v2 :: v_dual_min_num_f32 v5, v6, -v7 ; encoding: [0x01,0xb1,0x14,0xcf,0x06,0x21,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0xb1,0x14,0xcf,0x06,0x21,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x80,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x15,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x14,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x80,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x15,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x14,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x80,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x15,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x14,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x14,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x80,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x15,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x14,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x14,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x80,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x15,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x14,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x14,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x14,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x80,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x15,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x14,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x14,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x14,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x80,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x15,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x14,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x14,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x14,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x80,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x15,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x14,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x14,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x14,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x14,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x80,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x15,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x14,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x14,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x81,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x15,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x14,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x14,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x14,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x81,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x15,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x14,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x14,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x14,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x81,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x15,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x14,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x14,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x14,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x81,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x15,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x14,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x14,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x14,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x82 ; encoding: [0x04,0x21,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x82,0x07] +0x04,0x21,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x82,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x81,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x15,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x14,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x14,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x14,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x14,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x80,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x15,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x14,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x14,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x14,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x14,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x80,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x15,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x14,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x14,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x14,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v0, v1, v2 :: v_dual_mul_dx9_zero_f32 v5, v6, v7 ; encoding: [0x01,0x71,0x50,0xcf,0x06,0x01,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x71,0x50,0xcf,0x06,0x01,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x50,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07] +0x04,0x41,0x50,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_ashrrev_i32 v9, v1, v13 ; encoding: [0x04,0x61,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x61,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x86 ; encoding: [0x04,0x21,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x86,0x07] +0x04,0x21,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x86,0x07 + +# GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07] +0x04,0x31,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x03,0x04,0x07 + +# GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_lshrrev_b32 v9, v1, v13 ; encoding: [0x04,0x51,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x51,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_sub_nc_u32 v254, v4, v2 :: v_dual_sub_nc_u32 v9, v1, v13 ; encoding: [0x04,0x41,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09] +0x04,0x41,0x51,0xcf,0x01,0x01,0x02,0x00,0xfe,0x0d,0x00,0x09 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x51,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x51,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x51,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x51,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x50,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x51,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x51,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x51,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x51,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x50,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x51,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x51,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x51,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x51,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x50,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x50,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x51,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x51,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x51,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x51,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x50,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x50,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x51,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x51,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x51,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x51,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x50,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x50,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x50,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x51,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x51,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x51,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x51,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x50,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x50,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x51,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x51,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x50,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x51,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x51,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x50,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x50,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x50,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x51,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x51,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x51,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x51,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x50,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x50,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x50,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x51,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x51,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x50,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x51,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x51,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x50,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x50,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x51,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x51,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x51,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x51,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x50,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x50,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x50,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x51,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x51,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x51,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x51,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x50,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x50,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x50,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x51,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x51,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x51,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x51,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x50,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x50,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x50,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x51,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x51,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x51,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x51,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x50,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x50,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x50,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x51,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x51,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x51,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x51,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x50,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x50,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x50,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x51,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x51,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x50,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x51,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x51,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x50,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x50,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x50,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x51,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x51,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x50,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x51,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x51,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x50,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_sub_nc_u32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x50,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x50,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v0, -s1, -v2 :: v_dual_mul_f32 v5, -s6, -v7 ; encoding: [0x01,0x30,0x18,0xcf,0x06,0x36,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x30,0x18,0xcf,0x06,0x36,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_subrev_f32 v0, v1, -v2 :: v_dual_max_i32 v5, v6, v7 ; encoding: [0x01,0x71,0x19,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x71,0x19,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_subrev_f32 v0, v1, -v2 :: v_dual_min_i32 v5, v6, v7 ; encoding: [0x01,0x81,0x19,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x81,0x19,0xcf,0x06,0x05,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_subrev_f32 v0, v1, -v2 :: v_dual_sub_f32 v5, -v6, v7 ; encoding: [0x01,0x51,0x18,0xcf,0x06,0x15,0x02,0x00,0x00,0x07,0x00,0x05] +0x01,0x51,0x18,0xcf,0x06,0x15,0x02,0x00,0x00,0x07,0x00,0x05 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_f32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_add_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_ashrrev_i32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_cndmask_b32 v7, src_scc, v5, vcc_lo ; encoding: [0xc1,0x90,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07] +0xc1,0x90,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_fmac_f32 v7, src_scc, v5 ; encoding: [0xc1,0x00,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x00,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshlrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x10,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x10,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_lshrrev_b32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_i32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_max_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xa0,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xa0,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_i32 v7, src_scc, v5 ; encoding: [0xc1,0x80,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x80,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_min_num_f32 v7, src_scc, v5 ; encoding: [0xc1,0xb0,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0xb0,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mov_b32 v7, src_scc ; encoding: [0xc1,0x80,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07] +0xc1,0x80,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_dx9_zero_f32 v7, src_scc, v5 ; encoding: [0xc1,0x70,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x70,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_mul_f32 v7, src_scc, v5 ; encoding: [0xc1,0x30,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x30,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_f32 v7, src_scc, v5 ; encoding: [0xc1,0x50,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x50,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_sub_nc_u32 v7, src_scc, v5 ; encoding: [0xc1,0x40,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x40,0x19,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, -1, v4 :: v_dual_subrev_f32 v7, src_scc, v5 ; encoding: [0xc1,0x60,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07] +0xc1,0x60,0x18,0xcf,0xfd,0x00,0x04,0x00,0xff,0x05,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_f32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_add_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_ashrrev_i32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_cndmask_b32 v7, 0.5, v2, vcc_lo ; encoding: [0xf0,0x90,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07] +0xf0,0x90,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_fmac_f32 v7, 0.5, v2 ; encoding: [0xf0,0x00,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x00,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshlrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x10,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x10,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_lshrrev_b32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_i32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_max_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xa0,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xa0,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_i32 v7, 0.5, v2 ; encoding: [0xf0,0x80,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x80,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_min_num_f32 v7, 0.5, v2 ; encoding: [0xf0,0xb0,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0xb0,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mov_b32 v7, 0.5 ; encoding: [0xf0,0x80,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07] +0xf0,0x80,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_dx9_zero_f32 v7, 0.5, v2 ; encoding: [0xf0,0x70,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x70,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_mul_f32 v7, 0.5, v2 ; encoding: [0xf0,0x30,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x30,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_f32 v7, 0.5, v2 ; encoding: [0xf0,0x50,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x50,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_sub_nc_u32 v7, 0.5, v2 ; encoding: [0xf0,0x40,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x40,0x19,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, 0.5, v3 :: v_dual_subrev_f32 v7, 0.5, v2 ; encoding: [0xf0,0x60,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07] +0xf0,0x60,0x18,0xcf,0xf0,0x00,0x03,0x00,0xff,0x02,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_add_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_ashrrev_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_cndmask_b32 v7, exec_hi, v3, vcc_lo ; encoding: [0x7f,0x90,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7f,0x90,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_fmac_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x00,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x00,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshlrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x10,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x10,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_lshrrev_b32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_max_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xa0,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xa0,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_i32 v7, vcc_hi, v3 ; encoding: [0x7f,0x80,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x80,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_min_num_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0xb0,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0xb0,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x70,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x70,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_mul_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x30,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x30,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x50,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x50,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_sub_nc_u32 v7, vcc_hi, v3 ; encoding: [0x7f,0x40,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x40,0x19,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v2 :: v_dual_subrev_f32 v7, vcc_hi, v3 ; encoding: [0x7f,0x60,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7f,0x60,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_hi, v255 :: v_dual_mov_b32 v7, vcc_hi ; encoding: [0x7f,0x80,0x18,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7f,0x80,0x18,0xcf,0x6b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_add_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_ashrrev_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_cndmask_b32 v7, exec_lo, v3, vcc_lo ; encoding: [0x7e,0x90,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7e,0x90,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_fmac_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x00,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x00,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshlrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x10,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x10,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_lshrrev_b32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_max_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xa0,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xa0,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_i32 v7, ttmp15, v3 ; encoding: [0x7e,0x80,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x80,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_min_num_f32 v7, ttmp15, v3 ; encoding: [0x7e,0xb0,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0xb0,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x70,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x70,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_mul_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x30,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x30,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x50,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x50,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_sub_nc_u32 v7, ttmp15, v3 ; encoding: [0x7e,0x40,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x40,0x19,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v2 :: v_dual_subrev_f32 v7, ttmp15, v3 ; encoding: [0x7e,0x60,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7e,0x60,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, exec_lo, v255 :: v_dual_mov_b32 v7, ttmp15 ; encoding: [0x7e,0x80,0x18,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7e,0x80,0x18,0xcf,0x7b,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_f32 v7, m0, v3 ; encoding: [0x7d,0x40,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_add_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x00,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_ashrrev_i32 v7, m0, v3 ; encoding: [0x7d,0x60,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_cndmask_b32 v7, m0, v3, vcc_lo ; encoding: [0x7d,0x90,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7d,0x90,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_fmac_f32 v7, m0, v3 ; encoding: [0x7d,0x00,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x00,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshlrev_b32 v7, m0, v3 ; encoding: [0x7d,0x10,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x10,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_lshrrev_b32 v7, m0, v3 ; encoding: [0x7d,0x50,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_i32 v7, m0, v3 ; encoding: [0x7d,0x70,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_max_num_f32 v7, m0, v3 ; encoding: [0x7d,0xa0,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xa0,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_i32 v7, m0, v3 ; encoding: [0x7d,0x80,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x80,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_min_num_f32 v7, m0, v3 ; encoding: [0x7d,0xb0,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0xb0,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_dx9_zero_f32 v7, m0, v3 ; encoding: [0x7d,0x70,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x70,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_mul_f32 v7, m0, v3 ; encoding: [0x7d,0x30,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x30,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_f32 v7, m0, v3 ; encoding: [0x7d,0x50,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x50,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_sub_nc_u32 v7, m0, v3 ; encoding: [0x7d,0x40,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x40,0x19,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v2 :: v_dual_subrev_f32 v7, m0, v3 ; encoding: [0x7d,0x60,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7d,0x60,0x18,0xcf,0x7d,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, m0, v255 :: v_dual_mov_b32 v7, m0 ; encoding: [0x7d,0x80,0x18,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7d,0x80,0x18,0xcf,0x7d,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_f32 v7, s105, v3 ; encoding: [0x01,0x40,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_add_nc_u32 v7, s105, v3 ; encoding: [0x01,0x00,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_ashrrev_i32 v7, s105, v3 ; encoding: [0x01,0x60,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_cndmask_b32 v7, s1, v3, vcc_lo ; encoding: [0x01,0x90,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x90,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_fmac_f32 v7, s105, v3 ; encoding: [0x01,0x00,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x00,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshlrev_b32 v7, s105, v3 ; encoding: [0x01,0x10,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x10,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_lshrrev_b32 v7, s105, v3 ; encoding: [0x01,0x50,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_i32 v7, s105, v3 ; encoding: [0x01,0x70,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_max_num_f32 v7, s105, v3 ; encoding: [0x01,0xa0,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa0,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_i32 v7, s105, v3 ; encoding: [0x01,0x80,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x80,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_min_num_f32 v7, s105, v3 ; encoding: [0x01,0xb0,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb0,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_dx9_zero_f32 v7, s105, v3 ; encoding: [0x01,0x70,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x70,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_mul_f32 v7, s105, v3 ; encoding: [0x01,0x30,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x30,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_f32 v7, s105, v3 ; encoding: [0x01,0x50,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x50,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_sub_nc_u32 v7, s105, v3 ; encoding: [0x01,0x40,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x40,0x19,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v2 :: v_dual_subrev_f32 v7, s105, v3 ; encoding: [0x01,0x60,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x60,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s1, v255 :: v_dual_mov_b32 v7, s105 ; encoding: [0x01,0x80,0x18,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x80,0x18,0xcf,0x69,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_f32 v7, s1, v3 ; encoding: [0x69,0x40,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_add_nc_u32 v7, s1, v3 ; encoding: [0x69,0x00,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_ashrrev_i32 v7, s1, v3 ; encoding: [0x69,0x60,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_cndmask_b32 v7, s105, v3, vcc_lo ; encoding: [0x69,0x90,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x69,0x90,0x18,0xcf,0x69,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3 ; encoding: [0x69,0x00,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x00,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshlrev_b32 v7, s1, v3 ; encoding: [0x69,0x10,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x10,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_lshrrev_b32 v7, s1, v3 ; encoding: [0x69,0x50,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_i32 v7, s1, v3 ; encoding: [0x69,0x70,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_max_num_f32 v7, s1, v3 ; encoding: [0x69,0xa0,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xa0,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_i32 v7, s1, v3 ; encoding: [0x69,0x80,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x80,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_min_num_f32 v7, s1, v3 ; encoding: [0x69,0xb0,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0xb0,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_dx9_zero_f32 v7, s1, v3 ; encoding: [0x69,0x70,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x70,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_mul_f32 v7, s1, v3 ; encoding: [0x69,0x30,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x30,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_f32 v7, s1, v3 ; encoding: [0x69,0x50,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x50,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_sub_nc_u32 v7, s1, v3 ; encoding: [0x69,0x40,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x40,0x19,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v2 :: v_dual_subrev_f32 v7, s1, v3 ; encoding: [0x69,0x60,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x69,0x60,0x18,0xcf,0x01,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, s105, v255 :: v_dual_mov_b32 v7, s1 ; encoding: [0x69,0x80,0x18,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x69,0x80,0x18,0xcf,0x01,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_f32 v7, -1, v3 ; encoding: [0xfd,0x40,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_add_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x00,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_ashrrev_i32 v7, -1, v3 ; encoding: [0xfd,0x60,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_cndmask_b32 v7, -1, v3, vcc_lo ; encoding: [0xfd,0x90,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0xfd,0x90,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_fmac_f32 v7, -1, v3 ; encoding: [0xfd,0x00,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x00,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshlrev_b32 v7, -1, v3 ; encoding: [0xfd,0x10,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x10,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_lshrrev_b32 v7, -1, v3 ; encoding: [0xfd,0x50,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_i32 v7, -1, v3 ; encoding: [0xfd,0x70,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_max_num_f32 v7, -1, v3 ; encoding: [0xfd,0xa0,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xa0,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_i32 v7, -1, v3 ; encoding: [0xfd,0x80,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x80,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_min_num_f32 v7, -1, v3 ; encoding: [0xfd,0xb0,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0xb0,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_dx9_zero_f32 v7, -1, v3 ; encoding: [0xfd,0x70,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x70,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_mul_f32 v7, -1, v3 ; encoding: [0xfd,0x30,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x30,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_f32 v7, -1, v3 ; encoding: [0xfd,0x50,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x50,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_sub_nc_u32 v7, -1, v3 ; encoding: [0xfd,0x40,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x40,0x19,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v2 :: v_dual_subrev_f32 v7, -1, v3 ; encoding: [0xfd,0x60,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0xfd,0x60,0x18,0xcf,0xc1,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, src_scc, v255 :: v_dual_mov_b32 v7, -1 ; encoding: [0xfd,0x80,0x18,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0xfd,0x80,0x18,0xcf,0xc1,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_add_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_ashrrev_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_cndmask_b32 v7, ttmp15, v3, vcc_lo ; encoding: [0x7b,0x90,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x7b,0x90,0x18,0xcf,0x7b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_fmac_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x00,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x00,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshlrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x10,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x10,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_lshrrev_b32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_max_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xa0,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xa0,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_i32 v7, vcc_lo, v3 ; encoding: [0x7b,0x80,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x80,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_min_num_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0xb0,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0xb0,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_dx9_zero_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x70,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x70,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_mul_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x30,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x30,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x50,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x50,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_sub_nc_u32 v7, vcc_lo, v3 ; encoding: [0x7b,0x40,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x40,0x19,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v2 :: v_dual_subrev_f32 v7, vcc_lo, v3 ; encoding: [0x7b,0x60,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x7b,0x60,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, ttmp15, v255 :: v_dual_mov_b32 v7, vcc_lo ; encoding: [0x7b,0x80,0x18,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x7b,0x80,0x18,0xcf,0x6a,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_f32 v7, v255, v3 ; encoding: [0x01,0x41,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_add_nc_u32 v7, v255, v3 ; encoding: [0x01,0x01,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_ashrrev_i32 v7, v255, v3 ; encoding: [0x01,0x61,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_cndmask_b32 v7, v255, v3, vcc_lo ; encoding: [0x01,0x91,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x01,0x91,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_fmac_f32 v7, v255, v3 ; encoding: [0x01,0x01,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x01,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshlrev_b32 v7, v255, v3 ; encoding: [0x01,0x11,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x11,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_lshrrev_b32 v7, v255, v3 ; encoding: [0x01,0x51,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_i32 v7, v255, v3 ; encoding: [0x01,0x71,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_max_num_f32 v7, v255, v3 ; encoding: [0x01,0xa1,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xa1,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_i32 v7, v255, v3 ; encoding: [0x01,0x81,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x81,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_min_num_f32 v7, v255, v3 ; encoding: [0x01,0xb1,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0xb1,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_dx9_zero_f32 v7, v255, v3 ; encoding: [0x01,0x71,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x71,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_mul_f32 v7, v255, v3 ; encoding: [0x01,0x31,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x31,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_f32 v7, v255, v3 ; encoding: [0x01,0x51,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x51,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_sub_nc_u32 v7, v255, v3 ; encoding: [0x01,0x41,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x41,0x19,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v2 :: v_dual_subrev_f32 v7, v255, v3 ; encoding: [0x01,0x61,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x01,0x61,0x18,0xcf,0xff,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v1, v255 :: v_dual_mov_b32 v7, v255 ; encoding: [0x01,0x81,0x18,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x01,0x81,0x18,0xcf,0xff,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_f32 v7, v3, v3 ; encoding: [0x02,0x41,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_add_nc_u32 v7, v3, v3 ; encoding: [0x02,0x01,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_ashrrev_i32 v7, v3, v3 ; encoding: [0x02,0x61,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_cndmask_b32 v7, v3, v3, vcc_lo ; encoding: [0x02,0x91,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x02,0x91,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_fmac_f32 v7, v3, v3 ; encoding: [0x02,0x01,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x01,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshlrev_b32 v7, v3, v3 ; encoding: [0x02,0x11,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x11,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_lshrrev_b32 v7, v3, v3 ; encoding: [0x02,0x51,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_i32 v7, v3, v3 ; encoding: [0x02,0x71,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_max_num_f32 v7, v3, v3 ; encoding: [0x02,0xa1,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xa1,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_i32 v7, v3, v3 ; encoding: [0x02,0x81,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x81,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_min_num_f32 v7, v3, v3 ; encoding: [0x02,0xb1,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0xb1,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_dx9_zero_f32 v7, v3, v3 ; encoding: [0x02,0x71,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x71,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_mul_f32 v7, v3, v3 ; encoding: [0x02,0x31,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x31,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_f32 v7, v3, v3 ; encoding: [0x02,0x51,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x51,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_sub_nc_u32 v7, v3, v3 ; encoding: [0x02,0x41,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x41,0x19,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v2 :: v_dual_subrev_f32 v7, v3, v3 ; encoding: [0x02,0x61,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x02,0x61,0x18,0xcf,0x03,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v2, v255 :: v_dual_mov_b32 v7, v3 ; encoding: [0x02,0x81,0x18,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x02,0x81,0x18,0xcf,0x03,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_f32 v7, v2, v3 ; encoding: [0xff,0x41,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_add_nc_u32 v7, v2, v3 ; encoding: [0xff,0x01,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_ashrrev_i32 v7, v2, v3 ; encoding: [0xff,0x61,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_cndmask_b32 v7, v2, v3, vcc_lo ; encoding: [0xff,0x91,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0xff,0x91,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_fmac_f32 v7, v2, v3 ; encoding: [0xff,0x01,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x01,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshlrev_b32 v7, v2, v3 ; encoding: [0xff,0x11,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x11,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_lshrrev_b32 v7, v2, v3 ; encoding: [0xff,0x51,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_i32 v7, v2, v3 ; encoding: [0xff,0x71,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_max_num_f32 v7, v2, v3 ; encoding: [0xff,0xa1,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xa1,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_i32 v7, v2, v3 ; encoding: [0xff,0x81,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x81,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_min_num_f32 v7, v2, v3 ; encoding: [0xff,0xb1,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0xb1,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_dx9_zero_f32 v7, v2, v3 ; encoding: [0xff,0x71,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x71,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_mul_f32 v7, v2, v3 ; encoding: [0xff,0x31,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x31,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_f32 v7, v2, v3 ; encoding: [0xff,0x51,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x51,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_sub_nc_u32 v7, v2, v3 ; encoding: [0xff,0x41,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x41,0x19,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v2 :: v_dual_subrev_f32 v7, v2, v3 ; encoding: [0xff,0x61,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0xff,0x61,0x18,0xcf,0x02,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v255, v255 :: v_dual_mov_b32 v7, v2 ; encoding: [0xff,0x81,0x18,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0xff,0x81,0x18,0xcf,0x02,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_f32 v7, v4, v3 ; encoding: [0x03,0x41,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_add_nc_u32 v7, v4, v3 ; encoding: [0x03,0x01,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_ashrrev_i32 v7, v4, v3 ; encoding: [0x03,0x61,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_cndmask_b32 v7, v4, v3, vcc_lo ; encoding: [0x03,0x91,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x03,0x91,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_fmac_f32 v7, v4, v3 ; encoding: [0x03,0x01,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x01,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshlrev_b32 v7, v4, v3 ; encoding: [0x03,0x11,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x11,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_lshrrev_b32 v7, v4, v3 ; encoding: [0x03,0x51,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_i32 v7, v4, v3 ; encoding: [0x03,0x71,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_max_num_f32 v7, v4, v3 ; encoding: [0x03,0xa1,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xa1,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_i32 v7, v4, v3 ; encoding: [0x03,0x81,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x81,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_min_num_f32 v7, v4, v3 ; encoding: [0x03,0xb1,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0xb1,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_dx9_zero_f32 v7, v4, v3 ; encoding: [0x03,0x71,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x71,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_mul_f32 v7, v4, v3 ; encoding: [0x03,0x31,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x31,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_f32 v7, v4, v3 ; encoding: [0x03,0x51,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x51,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_sub_nc_u32 v7, v4, v3 ; encoding: [0x03,0x41,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x41,0x19,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v2 :: v_dual_subrev_f32 v7, v4, v3 ; encoding: [0x03,0x61,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x03,0x61,0x18,0xcf,0x04,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v3, v255 :: v_dual_mov_b32 v7, v4 ; encoding: [0x03,0x81,0x18,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x03,0x81,0x18,0xcf,0x04,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_f32 v7, v1, v3 ; encoding: [0x04,0x41,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_add_nc_u32 v7, v1, v3 ; encoding: [0x04,0x01,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_ashrrev_i32 v7, v1, v3 ; encoding: [0x04,0x61,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_bitop2_b32 v7, v1, v3 bitop3:0x83 ; encoding: [0x04,0x21,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x83,0x07] +0x04,0x21,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x83,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, s96 ; encoding: [0x04,0x91,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07] +0x04,0x91,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x60,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_cndmask_b32 v7, v1, v3, vcc_lo ; encoding: [0x04,0x91,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07] +0x04,0x91,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_fma_f32 v7, v1, v3, v4 ; encoding: [0x04,0x31,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07] +0x04,0x31,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x04,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_fmac_f32 v7, v1, v3 ; encoding: [0x04,0x01,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x01,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshlrev_b32 v7, v1, v3 ; encoding: [0x04,0x11,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x11,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_lshrrev_b32 v7, v1, v3 ; encoding: [0x04,0x51,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_i32 v7, v1, v3 ; encoding: [0x04,0x71,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_max_num_f32 v7, v1, v3 ; encoding: [0x04,0xa1,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xa1,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_i32 v7, v1, v3 ; encoding: [0x04,0x81,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x81,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_min_num_f32 v7, v1, v3 ; encoding: [0x04,0xb1,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0xb1,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_dx9_zero_f32 v7, v1, v3 ; encoding: [0x04,0x71,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x71,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_mul_f32 v7, v1, v3 ; encoding: [0x04,0x31,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x31,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_f32 v7, v1, v3 ; encoding: [0x04,0x51,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x51,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_sub_nc_u32 v7, v1, v3 ; encoding: [0x04,0x41,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x41,0x19,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v2 :: v_dual_subrev_f32 v7, v1, v3 ; encoding: [0x04,0x61,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07] +0x04,0x61,0x18,0xcf,0x01,0x01,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, v4, v255 :: v_dual_mov_b32 v7, v1 ; encoding: [0x04,0x81,0x18,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07] +0x04,0x81,0x18,0xcf,0x01,0x01,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_add_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_ashrrev_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_cndmask_b32 v7, vcc_hi, v3, vcc_lo ; encoding: [0x6b,0x90,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6b,0x90,0x18,0xcf,0x6b,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_fmac_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x00,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x00,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshlrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x10,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x10,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_lshrrev_b32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_max_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xa0,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xa0,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_i32 v7, exec_lo, v3 ; encoding: [0x6b,0x80,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x80,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_min_num_f32 v7, exec_lo, v3 ; encoding: [0x6b,0xb0,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0xb0,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x70,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x70,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_mul_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x30,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x30,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x50,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x50,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_sub_nc_u32 v7, exec_lo, v3 ; encoding: [0x6b,0x40,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x40,0x19,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v2 :: v_dual_subrev_f32 v7, exec_lo, v3 ; encoding: [0x6b,0x60,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6b,0x60,0x18,0xcf,0x7e,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_hi, v255 :: v_dual_mov_b32 v7, exec_lo ; encoding: [0x6b,0x80,0x18,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6b,0x80,0x18,0xcf,0x7e,0x00,0xff,0x00,0xff,0x00,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_add_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_ashrrev_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v7, vcc_lo, v3, vcc_lo ; encoding: [0x6a,0x90,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07] +0x6a,0x90,0x18,0xcf,0x6a,0x00,0x02,0x00,0xff,0x03,0x6a,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_fmac_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x00,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x00,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshlrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x10,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x10,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_lshrrev_b32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_max_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xa0,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xa0,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_i32 v7, exec_hi, v3 ; encoding: [0x6a,0x80,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x80,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_min_num_f32 v7, exec_hi, v3 ; encoding: [0x6a,0xb0,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0xb0,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_dx9_zero_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x70,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x70,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_mul_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x30,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x30,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x50,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x50,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_sub_nc_u32 v7, exec_hi, v3 ; encoding: [0x6a,0x40,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x40,0x19,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v2 :: v_dual_subrev_f32 v7, exec_hi, v3 ; encoding: [0x6a,0x60,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07] +0x6a,0x60,0x18,0xcf,0x7f,0x00,0x02,0x00,0xff,0x03,0x00,0x07 + +# GFX1250: v_dual_subrev_f32 v255, vcc_lo, v255 :: v_dual_mov_b32 v7, exec_hi ; encoding: [0x6a,0x80,0x18,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07] +0x6a,0x80,0x18,0xcf,0x7f,0x00,0xff,0x00,0xff,0x00,0x00,0x07 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt index 2b8d58853847b..55fdc2b15bf05 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt @@ -87,6 +87,7 @@ # CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05 -# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +# FIXME: The instruction gets printed using the wrong function (AMDGPUInstPrinter::printOperandAndIntInputMods) and hence the "-" modifier is not printed. +# COM: v_pk_fmac_f16_sdwa v5, v1, -v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16 diff --git a/llvm/test/MC/ELF/mc-dump.s b/llvm/test/MC/ELF/mc-dump.s index 389941db23e3b..36d3a05768dc6 100644 --- a/llvm/test/MC/ELF/mc-dump.s +++ b/llvm/test/MC/ELF/mc-dump.s @@ -12,7 +12,7 @@ # CHECK-NEXT:0 Data Size:0 [] # CHECK-NEXT: Symbol @0 _start # CHECK-NEXT:0 Org Offset:3 Value:0 -# CHECK-NEXT:3 Relaxable Size:2 > +# CHECK-NEXT:3 Relaxable Size:2 > # CHECK-NEXT: Fixup @1 Value:.Ltmp0 Kind:4001 # CHECK-NEXT:5 Data Size:16 [48,8b,04,25,00,00,00,00,48,8b,04,25,00,00,00,00] # CHECK-NEXT: Fixup @4 Value:f0@ Kind:4017 diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td index e9c2069fdbd98..c3895b524e85e 100644 --- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td @@ -535,7 +535,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3), // R00O-NEXT: GIM_Reject, // R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]] // R00O-NEXT: GIM_Reject, -// R00O-NEXT: }; // Size: 1890 bytes +// R00O-NEXT: }; // Size: 1894 bytes def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4), [(set GPR32:$dst, diff --git a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll index f8dcd80d4e141..3394efd52a3ba 100644 --- a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll +++ b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll @@ -54,7 +54,40 @@ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \ ; RUN: --check-prefix=REMARKS -; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR +; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE + +;; Next, add a threshold to prevent inlining of the promoted calls which have +;; count 2 (the default threshold of 2 means they are inlinable by default). +; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \ +; RUN: -memprof-icp-noinline-threshold=3 \ +; RUN: -enable-memprof-indirect-call-support=true \ +; RUN: -memprof-allow-recursive-callsites \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \ +; RUN: -r=%t/foo.o,_ZN2B03barEj, \ +; RUN: -r=%t/foo.o,_ZN1B3barEj, \ +; RUN: -r=%t/main.o,_Z3fooR2B0j, \ +; RUN: -r=%t/main.o,_Znwm, \ +; RUN: -r=%t/main.o,_ZdlPvm, \ +; RUN: -r=%t/main.o,_Z8externalPi, \ +; RUN: -r=%t/main.o,main,plx \ +; RUN: -r=%t/main.o,_ZN2B03barEj,plx \ +; RUN: -r=%t/main.o,_ZN1B3barEj,plx \ +; RUN: -r=%t/main.o,_ZTV1B,plx \ +; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \ +; RUN: -r=%t/main.o,_ZTS1B,plx \ +; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \ +; RUN: -r=%t/main.o,_ZTS2B0,plx \ +; RUN: -r=%t/main.o,_ZTI2B0,plx \ +; RUN: -r=%t/main.o,_ZTI1B,plx \ +; RUN: -r=%t/main.o,_ZTV2B0,plx \ +; RUN: -thinlto-threads=1 \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=. -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \ +; RUN: --check-prefix=REMARKS + +; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-NOINLINE ; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1 ; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1 @@ -98,12 +131,14 @@ ; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj ; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect ; IR: if.true.direct_targ: -; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]] +; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]] +; IR-NOINLINE: call {{.*}} @_ZN1B3barEj(ptr null, i32 0) #[[NOINLINE:[0-9]+]] ; IR: if.false.orig_indirect: ; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj ; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2 ; IR: if.true.direct_targ1: -; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]] +; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]] +; IR-NOINLINE: call {{.*}} @_ZN2B03barEj(ptr null, i32 0) #[[NOINLINE]] ; IR: if.false.orig_indirect2: ; IR: call {{.*}} %0 @@ -114,17 +149,20 @@ ; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj ; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect ; IR: if.true.direct_targ: -; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]] +; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]] +; IR-NOINLINE: call {{.*}} @_ZN1B3barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]] ; IR: if.false.orig_indirect: ; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj ; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2 ; IR: if.true.direct_targ1: -; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]] +; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]] +; IR-NOINLINE: call {{.*}} @_ZN2B03barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]] ; IR: if.false.orig_indirect2: ; IR: call {{.*}} %0 -; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" -; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold" +; IR-INLINE: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" +; IR-INLINE: attributes #[[COLD]] = {{.*}} "memprof"="cold" +; IR-NOINLINE: attributes #[[NOINLINE]] = { noinline } ;--- foo.ll target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/ThinLTO/X86/memprof_callee_type_mismatch.ll b/llvm/test/ThinLTO/X86/memprof_callee_type_mismatch.ll new file mode 100644 index 0000000000000..a2cca00515732 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof_callee_type_mismatch.ll @@ -0,0 +1,62 @@ +;; Test to ensure the callite when updated to call a clone does not mutate the +;; callee function type. In rare cases we may end up with a callee declaration +;; that does not match the call type, because it was imported from a different +;; module with an incomplete return type (in which case clang gives it a void +;; return type). + +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: llvm-as src.ll -o src.o +; RUN: llvm-as src.o.thinlto.ll -o src.o.thinlto.bc +; RUN: opt -passes=memprof-context-disambiguation src.o -S -memprof-import-summary=src.o.thinlto.bc | FileCheck %s + +;--- src.ll +; ModuleID = 'src.o' +source_filename = "src.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main(ptr %b) { +entry: + ;; This call is not changed as the summary specifies clone 0. + ; CHECK: call ptr @_Z3foov() + %call = call ptr @_Z3foov(), !callsite !5 + ;; After changing this call to call a clone, the function type should still + ;; be ptr, despite the void on the callee declaration. + ; CHECK: call ptr @_Z3foov.memprof.1() + %call1 = call ptr @_Z3foov(), !callsite !6 + %0 = load ptr, ptr %b, align 8 + ;; Although the summary indicates this should call clone 1, and the VP + ;; metadata indicates the callee is _Z3foov, it is not updated because + ;; the ICP facility requires the function types to match. + ; CHECK: call ptr %0() + %call2 = call ptr %0(), !prof !7, !callsite !8 + ret i32 0 +} + +;; Both the original callee function declaration and its clone have void return +;; type. +; CHECK: declare void @_Z3foov() +; CHECK: declare void @_Z3foov.memprof.1() +declare void @_Z3foov() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 21.0.0git (git@github.com:llvm/llvm-project.git e391301e0e4d9183fe06e69602e87b0bc889aeda)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "src.cc", directory: "", checksumkind: CSK_MD5, checksum: "8636c46e81402013b9d54e8307d2f149") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!5 = !{i64 8632435727821051414} +!6 = !{i64 -3421689549917153178} +!7 = !{!"VP", i32 0, i64 4, i64 9191153033785521275, i64 4} +!8 = !{i64 1234} + +;--- src.o.thinlto.ll +; ModuleID = 'src.o.thinlto.bc' +source_filename = "src.o.thinlto.bc" + +^0 = module: (path: "src.o", hash: (2823430083, 3994560862, 899296057, 1055405378, 2961356784)) +^1 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414)), (callee: null, clones: (1), stackIds: (15025054523792398438)), (callee: null, clones: (1), stackIds: (1234)))))) +^2 = flags: 353 +^3 = blockcount: 0 diff --git a/llvm/test/Transforms/Coroutines/coro-split-dbg-nested-struct.ll b/llvm/test/Transforms/Coroutines/coro-split-dbg-nested-struct.ll new file mode 100644 index 0000000000000..12dfa16991326 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-split-dbg-nested-struct.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -passes='cgscc(coro-split)' -S | FileCheck %s + +; Test that nested structs in coroutine frames have correct debug info scoping. + +; Minimal nested struct types that used to trigger a scoping issue: +; we used to set the wrong `scope` for the `DIDerivedType` member entries of the `DICompositeType` +; as well as the `scope` for `DICompositeType` for the inner struct itself. +%"struct.Inner" = type { i32, ptr } +%"struct.Outer" = type { %"struct.Inner", i64 } +%"class.Promise" = type { %"struct.Outer" } + +define void @test_coro_function() presplitcoroutine !dbg !10 { +entry: + %__promise = alloca %"class.Promise", align 8 + %0 = call token @llvm.coro.id(i32 0, ptr %__promise, ptr null, ptr null) + %1 = call ptr @llvm.coro.begin(token %0, ptr null) + %2 = call token @llvm.coro.save(ptr null) + ret void +} + +; CHECK: define void @test_coro_function() + +; Check that frame debug info is generated +; CHECK: ![[FRAME_TYPE:[0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "{{.*}}.coro_frame_ty" + +; Key validation: Check that nested structs have the correct scope hierarchy +; 1. Promise should be scoped to the frame +; CHECK: ![[PROMISE:[0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "class_Promise", scope: ![[FRAME_TYPE]] + +; 2. Members of Promise should be scoped to Promise (check this before Outer since it comes first in output) +; CHECK: !DIDerivedType(tag: DW_TAG_member, name: "struct_Outer", scope: ![[PROMISE]] + +; 3. Outer should be scoped to Promise (not the frame!) +; CHECK: ![[OUTER:[0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "struct_Outer", scope: ![[PROMISE]] + +; 4. First Outer member should be scoped to Outer +; CHECK: !DIDerivedType(tag: DW_TAG_member, name: "struct_Inner", scope: ![[OUTER]] + +; 5. Inner should be scoped to Outer (proper nesting) +; CHECK: ![[INNER:[0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "struct_Inner", scope: ![[OUTER]] + +; 6. Members of Inner should be scoped to Inner +; CHECK: !DIDerivedType(tag: DW_TAG_member, name: "__int_32", scope: ![[INNER]] +; CHECK: !DIDerivedType(tag: DW_TAG_member, name: "PointerType", scope: ![[INNER]] + +; 7. Second Outer member comes after Inner (due to output order) +; CHECK: !DIDerivedType(tag: DW_TAG_member, name: "__int_64", scope: ![[OUTER]] + +declare token @llvm.coro.id(i32, ptr readnone, ptr readonly, ptr) +declare ptr @llvm.coro.begin(token, ptr writeonly) +declare token @llvm.coro.save(ptr) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "test.cpp", directory: ".") +!9 = !{i32 2, !"Debug Info Version", i32 3} +!10 = distinct !DISubprogram(name: "test_coro_function", scope: !1, file: !1, line: 1, type: !11, spFlags: DISPFlagDefinition, unit: !0) +!11 = !DISubroutineType(types: !12) +!12 = !{null} diff --git a/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll b/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll new file mode 100644 index 0000000000000..13c7fce38ef01 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +declare void @use.i1(i1 %x) +declare void @use.i64(i64 %x) +declare void @use.i64i1({i64, i1} %x) + +define i1 @umul_greater_than_or_overflow_const(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 109802048057794950 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %mwo = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 168) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ugt i64 %mul, -16 + %ret = or i1 %ovf, %cmp + ret i1 %ret +} + +define i1 @umul_greater_than_or_overflow_const_i8(i8 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_i8( +; CHECK-SAME: i8 [[IN:%.*]]) { +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[IN]], 10 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %mwo = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %in, i8 24) + %mul = extractvalue { i8, i1 } %mwo, 0 + %ovf = extractvalue { i8, i1 } %mwo, 1 + %cmp = icmp ugt i8 %mul, -16 + %ret = or i1 %ovf, %cmp + ret i1 %ret +} + +define i1 @umul_greater_than_or_overflow_const_commuted(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_commuted( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 192153584101141162 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ugt i64 %mul, 9223372036854775800 + %ret = or i1 %cmp, %ovf + ret i1 %ret +} + +define i1 @umul_greater_than_or_overflow_const_disjoint(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_disjoint( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 230584300921369395 +; CHECK-NEXT: ret i1 [[TMP6]] +; + %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 40) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ugt i64 %mul, 9223372036854775800 + %ret = or disjoint i1 %ovf, %cmp + ret i1 %ret +} + +define i1 @umul_greater_than_or_overflow_const_multiuse_mul(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_mul( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[IN]], 48 +; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162 +; CHECK-NEXT: tail call void @use.i64(i64 [[MUL]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ugt i64 %mul, 9223372036854775800 + %ret = or i1 %ovf, %cmp + tail call void @use.i64(i64 %mul) + ret i1 %ret +} + +define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_overflow( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[OVF:%.*]] = icmp ugt i64 [[IN]], 384307168202282325 +; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162 +; CHECK-NEXT: tail call void @use.i1(i1 [[OVF]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ugt i64 %mul, 9223372036854775800 + %ret = or i1 %ovf, %cmp + tail call void @use.i1(i1 %ovf) + ret i1 %ret +} + +define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[MWO:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48) +; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162 +; CHECK-NEXT: tail call void @use.i64i1({ i64, i1 } [[MWO]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ugt i64 %mul, 9223372036854775800 + %ret = or i1 %ovf, %cmp + tail call void @use.i64i1({ i64, i1 } %mwo) + ret i1 %ret +} + +define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(<2 x i64> %in) { +; CHECK-LABEL: define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat( +; CHECK-SAME: <2 x i64> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <2 x i64> [[IN]], splat (i64 6477087104532848) +; CHECK-NEXT: ret <2 x i1> [[TMP6]] +; + %mwo = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> %in, <2 x i64> ) + %mul = extractvalue { <2 x i64>, <2 x i1> } %mwo, 0 + %ovf = extractvalue { <2 x i64>, <2 x i1> } %mwo, 1 + %cmp = icmp ugt <2 x i64> %mul, + %ret = or <2 x i1> %ovf, %cmp + ret <2 x i1> %ret +} + +; Negative test +define <4 x i1> @umul_greater_than_or_overflow_const_vector_non_splat_negative(<4 x i64> %in) { +; CHECK-LABEL: define <4 x i1> @umul_greater_than_or_overflow_const_vector_non_splat_negative( +; CHECK-SAME: <4 x i64> [[IN:%.*]]) { +; CHECK-NEXT: [[MWO:%.*]] = tail call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> [[IN]], <4 x i64> ) +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { <4 x i64>, <4 x i1> } [[MWO]], 0 +; CHECK-NEXT: [[OVF:%.*]] = extractvalue { <4 x i64>, <4 x i1> } [[MWO]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <4 x i64> [[MUL]], +; CHECK-NEXT: [[RET:%.*]] = or <4 x i1> [[OVF]], [[CMP]] +; CHECK-NEXT: ret <4 x i1> [[RET]] +; + %mwo = tail call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v2i64(<4 x i64> %in, <4 x i64> ) + %mul = extractvalue { <4 x i64>, <4 x i1> } %mwo, 0 + %ovf = extractvalue { <4 x i64>, <4 x i1> } %mwo, 1 + %cmp = icmp ugt <4 x i64> %mul, + %ret = or <4 x i1> %ovf, %cmp + ret <4 x i1> %ret +} + +; Negative test +define <2 x i1> @umul_greater_than_or_overflow_const_vector_poison_non_splat_negative(<2 x i64> %in) { +; CHECK-LABEL: define <2 x i1> @umul_greater_than_or_overflow_const_vector_poison_non_splat_negative( +; CHECK-SAME: <2 x i64> [[IN:%.*]]) { +; CHECK-NEXT: [[MWO:%.*]] = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> [[IN]], <2 x i64> ) +; CHECK-NEXT: [[MUL:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[MWO]], 0 +; CHECK-NEXT: [[OVF:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[MWO]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i64> [[MUL]], +; CHECK-NEXT: [[RET:%.*]] = or <2 x i1> [[OVF]], [[CMP]] +; CHECK-NEXT: ret <2 x i1> [[RET]] +; + %mwo = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> %in, <2 x i64> ) + %mul = extractvalue { <2 x i64>, <2 x i1> } %mwo, 0 + %ovf = extractvalue { <2 x i64>, <2 x i1> } %mwo, 1 + %cmp = icmp ugt <2 x i64> %mul, + %ret = or <2 x i1> %ovf, %cmp + ret <2 x i1> %ret +} + +; Negative test +define i1 @umul_greater_than_and_overflow_const_negative(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_and_overflow_const_negative( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP3]], 9223372036854775800 +; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: ret i1 [[TMP6]] +; + %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ult i64 %mul, 9223372036854775800 + %ret = and i1 %ovf, %cmp + ret i1 %ret +} + +; Negative test +define i1 @umul_less_than_or_overflow_const_negative(i64 %in) { +; CHECK-LABEL: define i1 @umul_less_than_or_overflow_const_negative( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP3]], 9223372036854775800 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: ret i1 [[TMP6]] +; + %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ult i64 %mul, 9223372036854775800 + %ret = or i1 %ovf, %cmp + ret i1 %ret +} + +; Negative test +define i1 @umul_greater_than_or_overflow_const_multiuse_icmp_negative(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_icmp_negative( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: tail call void @use.i1(i1 [[TMP5]]) +; CHECK-NEXT: ret i1 [[TMP6]] +; + %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ugt i64 %mul, 9223372036854775800 + %ret = or i1 %ovf, %cmp + tail call void @use.i1(i1 %cmp) + ret i1 %ret +} + +; Negative test. The umul.with.overflow should be folded away before. +define i1 @umul_greater_than_or_overflow_const_0_negative(i64 %in) { +; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_0_negative( +; CHECK-SAME: i64 [[IN:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %mwo = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 0) + %mul = extractvalue { i64, i1 } %mwo, 0 + %ovf = extractvalue { i64, i1 } %mwo, 1 + %cmp = icmp ugt i64 %mul, 0 + %ret = or i1 %ovf, %cmp + ret i1 %ret +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll index 3c1094f2ee31d..ff2527d5bb6ad 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll @@ -131,12 +131,11 @@ define i32 @negative_test_type_is_struct(i32 %c, ptr %a, ptr %b) { ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: .LBB2_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: ldr w9, [x1], #4 ; CHECK-NEXT: cbnz w9, .LBB2_5 ; CHECK-NEXT: // %bb.3: // %for.cond ; CHECK-NEXT: // in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: subs x8, x8, #1 -; CHECK-NEXT: add x1, x1, #4 ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index 9fee8a390504a..61ef3cef603fa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -14,16 +14,15 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) ; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 64 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 510, [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 64 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 510, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 510, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 64 +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add i64 3, [[N_VEC]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: vector.body: @@ -31,43 +30,13 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP37:%.*]] = mul nuw i64 [[TMP36]], 32 -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP37]] -; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP39]], 48 -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP40]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 1 -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP29]], align 1 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP38]], align 1 -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP41]], align 1 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 32 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 48 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP27]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 1 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load , ptr [[TMP22]], align 1 -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP25]], align 1 -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP28]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ne [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp ne [[WIDE_LOAD5]], [[WIDE_LOAD6]] -; CHECK-NEXT: [[TMP31:%.*]] = icmp ne [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load , ptr [[TMP10]], align 1 ; CHECK-NEXT: [[TMP32:%.*]] = icmp ne [[WIDE_LOAD4]], [[WIDE_LOAD8]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]] -; CHECK-NEXT: [[TMP33:%.*]] = or [[TMP11]], [[TMP30]] -; CHECK-NEXT: [[TMP34:%.*]] = or [[TMP33]], [[TMP31]] -; CHECK-NEXT: [[TMP35:%.*]] = or [[TMP34]], [[TMP32]] -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP35]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1( [[TMP32]]) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] ; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] @@ -77,26 +46,7 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 510, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP63]], 16 -; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) -; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP42]], 3 -; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[TMP62]], [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 true) -; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP42]], 2 -; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[TMP58]], [[TMP46]] -; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i64 [[TMP46]], [[TMP42]] -; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP47]], i64 [[TMP50]], i64 [[TMP45]] -; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP30]], i1 true) -; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[TMP42]], 1 -; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP64]], [[TMP52]] -; CHECK-NEXT: [[TMP53:%.*]] = icmp ne i64 [[TMP52]], [[TMP42]] -; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP53]], i64 [[TMP56]], i64 [[TMP51]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP11]], i1 true) -; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP42]], 0 -; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[TMP65]], [[TMP15]] -; CHECK-NEXT: [[TMP59:%.*]] = icmp ne i64 [[TMP15]], [[TMP42]] -; CHECK-NEXT: [[TMP61:%.*]] = select i1 [[TMP59]], i64 [[TMP60]], i64 [[TMP57]] +; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX1]], [[TMP61]] ; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]] ; CHECK-NEXT: br label [[LOOP_END]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll index 5efd821ba990f..3cde3f3422cf9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph\:" --version 5 ; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=VF2IC2 %s ; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF4 %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" @@ -25,14 +26,10 @@ define void @derived_int_ivs(ptr noalias %a, ptr noalias %b, i64 %end) { ; VF2-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 16 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 16, [[TMP5]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]] -; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP6]], align 8 -; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> -; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]] -; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[STRIDED_VEC]], <2 x double> [[STRIDED_VEC1]], <4 x i32> -; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <4 x i32> -; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 -; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP7]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 ; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: [[MIDDLE_BLOCK]]: @@ -40,6 +37,41 @@ define void @derived_int_ivs(ptr noalias %a, ptr noalias %b, i64 %end) { ; VF2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] ; VF2: [[SCALAR_PH]]: ; +; VF2IC2-LABEL: define void @derived_int_ivs( +; VF2IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[END:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[END]], -32 +; VF2IC2-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4 +; VF2IC2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VF2IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; VF2IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; VF2IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; VF2IC2-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 16 +; VF2IC2-NEXT: [[TMP4:%.*]] = add i64 16, [[TMP3]] +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2IC2-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 16 +; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = add i64 16, [[TMP5]] +; VF2IC2-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16 +; VF2IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]] +; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]] +; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 +; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP8]], align 8 +; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]] +; VF2IC2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]] +; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP9]], align 8 +; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD1]], ptr [[TMP10]], align 8 +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VF2IC2-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; VF2IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VF2IC2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; VF2IC2: [[SCALAR_PH]]: +; ; VF4-LABEL: define void @derived_int_ivs( ; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[END:%.*]]) { ; VF4-NEXT: [[ENTRY:.*:]] @@ -135,13 +167,9 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) { ; VF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 16 ; VF2-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX6]] -; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[NEXT_GEP]], align 8 -; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> -; VF2-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> -; VF2-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[STRIDED_VEC]], <2 x double> [[STRIDED_VEC8]], <4 x i32> -; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <4 x i32> -; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP7]], align 8 -; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[NEXT_GEP]], align 8 +; VF2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[NEXT_GEP7]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 ; VF2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VF2-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2: [[MIDDLE_BLOCK]]: @@ -149,6 +177,61 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) { ; VF2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] ; VF2: [[SCALAR_PH]]: ; +; VF2IC2-LABEL: define void @derived_pointer_ivs( +; VF2IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: [[A5:%.*]] = ptrtoint ptr [[A]] to i64 +; VF2IC2-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64 +; VF2IC2-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 +; VF2IC2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 +; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -16 +; VF2IC2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A5]] +; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4 +; VF2IC2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; VF2IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 +; VF2IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; VF2IC2: [[VECTOR_MEMCHECK]]: +; VF2IC2-NEXT: [[TMP4:%.*]] = add i64 [[END1]], -16 +; VF2IC2-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[A2]] +; VF2IC2-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4 +; VF2IC2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4 +; VF2IC2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 16 +; VF2IC2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] +; VF2IC2-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]] +; VF2IC2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]] +; VF2IC2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] +; VF2IC2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; VF2IC2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 +; VF2IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; VF2IC2-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 16 +; VF2IC2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] +; VF2IC2-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], 16 +; VF2IC2-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 +; VF2IC2-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 16 +; VF2IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; VF2IC2-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP13]] +; VF2IC2-NEXT: [[OFFSET_IDX7:%.*]] = mul i64 [[INDEX]], 16 +; VF2IC2-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX7]], 16 +; VF2IC2-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX7]] +; VF2IC2-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] +; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[NEXT_GEP]], align 8 +; VF2IC2-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x double>, ptr [[NEXT_GEP6]], align 8 +; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[NEXT_GEP8]], align 8 +; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD10]], ptr [[NEXT_GEP9]], align 8 +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VF2IC2-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; VF2IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; VF2IC2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; VF2IC2: [[SCALAR_PH]]: +; ; VF4-LABEL: define void @derived_pointer_ivs( ; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) { ; VF4-NEXT: [[ENTRY:.*:]] @@ -235,21 +318,43 @@ define void @narrow_with_uniform_add_and_gep(ptr noalias %p) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP0]] -; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 -; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> -; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> -; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1) +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 1) -; VF2-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> -; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> -; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP1]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 ; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: [[MIDDLE_BLOCK]]: ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] ; VF2: [[SCALAR_PH]]: ; +; VF2IC2-LABEL: define void @narrow_with_uniform_add_and_gep( +; VF2IC2-SAME: ptr noalias [[P:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2IC2-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 0 +; VF2IC2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 0 +; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP1]] +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP2]] +; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 +; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 +; VF2IC2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[WIDE_LOAD]], splat (i64 1) +; VF2IC2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[WIDE_LOAD1]], splat (i64 1) +; VF2IC2-NEXT: store <2 x i64> [[TMP5]], ptr [[TMP3]], align 8 +; VF2IC2-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 +; VF2IC2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; VF2IC2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] +; VF2IC2: [[SCALAR_PH]]: +; ; VF4-LABEL: define void @narrow_with_uniform_add_and_gep( ; VF4-SAME: ptr noalias [[P:%.*]]) { ; VF4-NEXT: [[ENTRY:.*:]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index 6dc17e0993a22..0f99ed576f1fe 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -15,22 +15,10 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4 -; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 -; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 10) -; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD1]], splat (i32 10) -; VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD2]], splat (i32 10) +; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD3]], splat (i32 10) -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP2]], [[TMP6]] -; VF4IC4-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP7]] -; VF4IC4-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP8]] -; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]]) +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) ; VF4IC4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; VF4IC4-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] ; VF4IC4-NEXT: br i1 [[TMP5]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -101,31 +89,13 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 +; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 0 -; VF4IC4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 4 -; VF4IC4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 8 -; VF4IC4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1 -; VF4IC4-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; VF4IC4-NEXT: [[TMP11:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD5]] -; VF4IC4-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP4]], [[TMP11]] -; VF4IC4-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP14]], [[TMP12]] -; VF4IC4-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]] -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]]) +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -134,20 +104,7 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP33:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) -; VF4IC4-NEXT: [[TMP34:%.*]] = add i64 12, [[TMP33]] -; VF4IC4-NEXT: [[TMP35:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) -; VF4IC4-NEXT: [[TMP24:%.*]] = add i64 8, [[TMP35]] -; VF4IC4-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP35]], 4 -; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i64 [[TMP24]], i64 [[TMP34]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) -; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 4, [[TMP26]] -; VF4IC4-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP26]], 4 -; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], i64 [[TMP28]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) -; VF4IC4-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP30]] -; VF4IC4-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP30]], 4 -; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP31]], i64 [[TMP32]], i64 [[TMP29]] +; VF4IC4-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) ; VF4IC4-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_END]] @@ -210,22 +167,10 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[INDEX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1 -; VF4IC4-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], splat (i8 72) -; VF4IC4-NEXT: [[TMP15:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD1]], splat (i8 72) -; VF4IC4-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD2]], splat (i8 72) +; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], splat (i8 72) -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP2]], [[TMP15]] -; VF4IC4-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP16]] -; VF4IC4-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP17]] -; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]]) +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]]) ; VF4IC4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; VF4IC4-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] ; VF4IC4-NEXT: br i1 [[TMP5]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -234,20 +179,7 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP28:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) -; VF4IC4-NEXT: [[TMP29:%.*]] = add i64 12, [[TMP28]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP16]], i1 true) -; VF4IC4-NEXT: [[TMP19:%.*]] = add i64 8, [[TMP30]] -; VF4IC4-NEXT: [[TMP18:%.*]] = icmp ne i64 [[TMP30]], 4 -; VF4IC4-NEXT: [[TMP20:%.*]] = select i1 [[TMP18]], i64 [[TMP19]], i64 [[TMP29]] -; VF4IC4-NEXT: [[TMP21:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true) -; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 4, [[TMP21]] -; VF4IC4-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP21]], 4 -; VF4IC4-NEXT: [[TMP24:%.*]] = select i1 [[TMP22]], i64 [[TMP23]], i64 [[TMP20]] -; VF4IC4-NEXT: [[TMP25:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) -; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 0, [[TMP25]] -; VF4IC4-NEXT: [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 4 -; VF4IC4-NEXT: [[TMP6:%.*]] = select i1 [[TMP26]], i64 [[TMP27]], i64 [[TMP24]] +; VF4IC4-NEXT: [[TMP6:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) ; VF4IC4-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[TMP6]] ; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP7]] ; VF4IC4-NEXT: br label [[LOOP_END]] @@ -304,31 +236,13 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 +; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 0 -; VF4IC4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 4 -; VF4IC4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 8 -; VF4IC4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1 -; VF4IC4-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; VF4IC4-NEXT: [[TMP11:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD5]] -; VF4IC4-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP4]], [[TMP11]] -; VF4IC4-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP14]], [[TMP12]] -; VF4IC4-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]] -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]]) +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -337,20 +251,7 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP33:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) -; VF4IC4-NEXT: [[TMP34:%.*]] = add i64 12, [[TMP33]] -; VF4IC4-NEXT: [[TMP35:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) -; VF4IC4-NEXT: [[TMP24:%.*]] = add i64 8, [[TMP35]] -; VF4IC4-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP35]], 4 -; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i64 [[TMP24]], i64 [[TMP34]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) -; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 4, [[TMP26]] -; VF4IC4-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP26]], 4 -; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], i64 [[TMP28]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) -; VF4IC4-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP30]] -; VF4IC4-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP30]], 4 -; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP31]], i64 [[TMP32]], i64 [[TMP29]] +; VF4IC4-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) ; VF4IC4-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_END]] @@ -414,31 +315,13 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 +; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 0 -; VF4IC4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 4 -; VF4IC4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 8 -; VF4IC4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1 -; VF4IC4-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; VF4IC4-NEXT: [[TMP11:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD5]] -; VF4IC4-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP4]], [[TMP11]] -; VF4IC4-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP14]], [[TMP12]] -; VF4IC4-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]] -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]]) +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -447,20 +330,7 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP33:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) -; VF4IC4-NEXT: [[TMP34:%.*]] = add i64 12, [[TMP33]] -; VF4IC4-NEXT: [[TMP35:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) -; VF4IC4-NEXT: [[TMP24:%.*]] = add i64 8, [[TMP35]] -; VF4IC4-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP35]], 4 -; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i64 [[TMP24]], i64 [[TMP34]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) -; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 4, [[TMP26]] -; VF4IC4-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP26]], 4 -; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], i64 [[TMP28]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) -; VF4IC4-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP30]] -; VF4IC4-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP30]], 4 -; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP31]], i64 [[TMP32]], i64 [[TMP29]] +; VF4IC4-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) ; VF4IC4-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] @@ -531,31 +401,13 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 +; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 0 -; VF4IC4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 4 -; VF4IC4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 8 -; VF4IC4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1 -; VF4IC4-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; VF4IC4-NEXT: [[TMP11:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD5]] -; VF4IC4-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP4]], [[TMP11]] -; VF4IC4-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP14]], [[TMP12]] -; VF4IC4-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]] -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]]) +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -564,20 +416,7 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP33:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) -; VF4IC4-NEXT: [[TMP34:%.*]] = add i64 12, [[TMP33]] -; VF4IC4-NEXT: [[TMP35:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) -; VF4IC4-NEXT: [[TMP24:%.*]] = add i64 8, [[TMP35]] -; VF4IC4-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP35]], 4 -; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i64 [[TMP24]], i64 [[TMP34]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) -; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 4, [[TMP26]] -; VF4IC4-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP26]], 4 -; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], i64 [[TMP28]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) -; VF4IC4-NEXT: [[TMP32:%.*]] = add i64 0, [[TMP30]] -; VF4IC4-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP30]], 4 -; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP31]], i64 [[TMP32]], i64 [[TMP29]] +; VF4IC4-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) ; VF4IC4-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] @@ -648,48 +487,18 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -12 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 -3 -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 -; VF4IC4-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD1]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 -; VF4IC4-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD9]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1 -; VF4IC4-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD11]], <4 x i8> poison, <4 x i32> +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 0 -; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 -3 -; VF4IC4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -4 -; VF4IC4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 -3 -; VF4IC4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -8 -; VF4IC4-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i32 -3 -; VF4IC4-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -12 -; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP45]], i32 -3 -; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1 -; VF4IC4-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD7]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i8>, ptr [[TMP29]], align 1 -; VF4IC4-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD10]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i8>, ptr [[TMP44]], align 1 -; VF4IC4-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD12]], <4 x i8> poison, <4 x i32> +; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 -3 ; VF4IC4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i8>, ptr [[TMP46]], align 1 ; VF4IC4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD14]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[REVERSE2]], [[REVERSE8]] -; VF4IC4-NEXT: [[TMP19:%.*]] = icmp ne <4 x i8> [[REVERSE10]], [[REVERSE11]] -; VF4IC4-NEXT: [[TMP20:%.*]] = icmp ne <4 x i8> [[REVERSE12]], [[REVERSE13]] ; VF4IC4-NEXT: [[TMP21:%.*]] = icmp ne <4 x i8> [[REVERSE14]], [[REVERSE15]] -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP22:%.*]] = or <4 x i1> [[TMP6]], [[TMP19]] -; VF4IC4-NEXT: [[TMP23:%.*]] = or <4 x i1> [[TMP22]], [[TMP20]] -; VF4IC4-NEXT: [[TMP24:%.*]] = or <4 x i1> [[TMP23]], [[TMP21]] -; VF4IC4-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP24]]) -; VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1008 +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]]) +; VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 ; VF4IC4-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] ; VF4IC4-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4IC4: middle.split: @@ -697,25 +506,12 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br i1 false, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) -; VF4IC4-NEXT: [[TMP42:%.*]] = add i64 12, [[TMP41]] -; VF4IC4-NEXT: [[TMP43:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 true) -; VF4IC4-NEXT: [[TMP32:%.*]] = add i64 8, [[TMP43]] -; VF4IC4-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP43]], 4 -; VF4IC4-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i64 [[TMP32]], i64 [[TMP42]] -; VF4IC4-NEXT: [[TMP34:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) -; VF4IC4-NEXT: [[TMP36:%.*]] = add i64 4, [[TMP34]] -; VF4IC4-NEXT: [[TMP35:%.*]] = icmp ne i64 [[TMP34]], 4 -; VF4IC4-NEXT: [[TMP37:%.*]] = select i1 [[TMP35]], i64 [[TMP36]], i64 [[TMP33]] -; VF4IC4-NEXT: [[TMP38:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) -; VF4IC4-NEXT: [[TMP40:%.*]] = add i64 0, [[TMP38]] -; VF4IC4-NEXT: [[TMP39:%.*]] = icmp ne i64 [[TMP38]], 4 -; VF4IC4-NEXT: [[TMP10:%.*]] = select i1 [[TMP39]], i64 [[TMP40]], i64 [[TMP37]] +; VF4IC4-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) ; VF4IC4-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] ; VF4IC4-NEXT: [[TMP12:%.*]] = sub i64 1023, [[TMP11]] ; VF4IC4-NEXT: br label [[LOOP_END]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 15, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ] +; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP:%.*]] ; VF4IC4: loop: ; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -774,31 +570,13 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 +; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] ; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 0 -; VF4IC4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 4 -; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 8 -; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 12 -; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP29]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 -; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 -; VF4IC4-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD4]] -; VF4IC4-NEXT: [[TMP11:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; VF4IC4-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP19]], align 1 ; VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP4]], [[TMP11]] -; VF4IC4-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP14]], [[TMP12]] -; VF4IC4-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]] -; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]]) +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) ; VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; VF4IC4-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; VF4IC4-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -808,20 +586,7 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; VF4IC4: vector.early.exit: ; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) -; VF4IC4-NEXT: [[TMP20:%.*]] = add i64 12, [[FIRST_ACTIVE_LANE1]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) -; VF4IC4-NEXT: [[TMP22:%.*]] = add i64 8, [[FIRST_ACTIVE_LANE8]] -; VF4IC4-NEXT: [[TMP21:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE8]], 4 -; VF4IC4-NEXT: [[TMP23:%.*]] = select i1 [[TMP21]], i64 [[TMP22]], i64 [[TMP20]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) -; VF4IC4-NEXT: [[TMP25:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE9]] -; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE9]], 4 -; VF4IC4-NEXT: [[TMP26:%.*]] = select i1 [[TMP24]], i64 [[TMP25]], i64 [[TMP23]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) -; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE10]] -; VF4IC4-NEXT: [[TMP27:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE10]], 4 -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = select i1 [[TMP27]], i64 [[TMP28]], i64 [[TMP26]] -; VF4IC4-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]] +; VF4IC4-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i8> [[WIDE_LOAD3]], i64 [[FIRST_ACTIVE_LANE1]] ; VF4IC4-NEXT: br label [[LOOP_END]] ; VF4IC4: scalar.ph: ; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -884,48 +649,18 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 -; VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -4 -; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 -3 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -8 -; VF4IC4-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 -3 -; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -12 -; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 -3 -; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 -; VF4IC4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 -; VF4IC4-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD1]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP37]], align 1 -; VF4IC4-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD3]], <4 x i8> poison, <4 x i32> +; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] ; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; VF4IC4-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i32 -3 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -12 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 -3 -; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP39]], align 1 -; VF4IC4-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD7]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 -; VF4IC4-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD9]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1 -; VF4IC4-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD11]], <4 x i8> poison, <4 x i32> +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i32 -3 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> -; VF4IC4-NEXT: [[TMP6:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE8]] -; VF4IC4-NEXT: [[TMP19:%.*]] = icmp ne <4 x i8> [[REVERSE2]], [[REVERSE10]] -; VF4IC4-NEXT: [[TMP20:%.*]] = icmp ne <4 x i8> [[REVERSE4]], [[REVERSE12]] ; VF4IC4-NEXT: [[TMP21:%.*]] = icmp ne <4 x i8> [[REVERSE6]], [[REVERSE14]] -; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF4IC4-NEXT: [[TMP22:%.*]] = or <4 x i1> [[TMP6]], [[TMP19]] -; VF4IC4-NEXT: [[TMP23:%.*]] = or <4 x i1> [[TMP22]], [[TMP20]] -; VF4IC4-NEXT: [[TMP24:%.*]] = or <4 x i1> [[TMP23]], [[TMP21]] -; VF4IC4-NEXT: [[TMP25:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP24]]) -; VF4IC4-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1008 +; VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4IC4-NEXT: [[TMP25:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]]) +; VF4IC4-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 ; VF4IC4-NEXT: [[TMP27:%.*]] = or i1 [[TMP25]], [[TMP26]] ; VF4IC4-NEXT: br i1 [[TMP27]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF4IC4: middle.split: @@ -934,23 +669,10 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: br i1 false, label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; VF4IC4: vector.early.exit: ; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) -; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 12, [[FIRST_ACTIVE_LANE1]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 true) -; VF4IC4-NEXT: [[TMP30:%.*]] = add i64 8, [[FIRST_ACTIVE_LANE15]] -; VF4IC4-NEXT: [[TMP29:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE15]], 4 -; VF4IC4-NEXT: [[TMP31:%.*]] = select i1 [[TMP29]], i64 [[TMP30]], i64 [[TMP28]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE16:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) -; VF4IC4-NEXT: [[TMP33:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE16]] -; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE16]], 4 -; VF4IC4-NEXT: [[TMP34:%.*]] = select i1 [[TMP32]], i64 [[TMP33]], i64 [[TMP31]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE17:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) -; VF4IC4-NEXT: [[TMP36:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE17]] -; VF4IC4-NEXT: [[TMP35:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE17]], 4 -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = select i1 [[TMP35]], i64 [[TMP36]], i64 [[TMP34]] -; VF4IC4-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i8> [[REVERSE]], i64 [[FIRST_ACTIVE_LANE]] +; VF4IC4-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i8> [[REVERSE6]], i64 [[FIRST_ACTIVE_LANE1]] ; VF4IC4-NEXT: br label [[LOOP_END]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 15, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ] +; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP:%.*]] ; VF4IC4: loop: ; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll index 9a55dc99c316b..018e2c213ddf2 100644 --- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll +++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll @@ -1,12 +1,12 @@ -; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize --disable-output -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization --disable-output -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts -; -; We have 2 loops, one of them is vectorizable and the second one is not. -; +; We have 3 loops, two of them are vectorizable (with one being early-exit +; vectorized) and the third one is not. -; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization -; CHECK: 1 loop-vectorize - Number of loops vectorized +; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization +; CHECK: 1 loop-vectorize - Number of early exit loops vectorized +; CHECK: 2 loop-vectorize - Number of loops vectorized target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -31,6 +31,36 @@ for.end: ; preds = %entry, %for.body ret void } +define i32 @early_exit_vectorized(i64 %end) { +entry: + %p1 = alloca [1024 x i32] + %p2 = alloca [1024 x i32] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + %end.clamped = and i64 %end, 1023 + br label %for.body + +for.body: + %ind = phi i64 [ %ind.next, %for.inc ], [ 0, %entry ] + %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind + %0 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %ind + %1 = load i32, ptr %arrayidx2, align 4 + %cmp.early = icmp eq i32 %0, %1 + br i1 %cmp.early, label %found, label %for.inc + +for.inc: + %ind.next = add i64 %ind, 1 + %cmp = icmp ult i64 %ind.next, %end.clamped + br i1 %cmp, label %for.body, label %exit + +found: + ret i32 1 + +exit: + ret i32 0 +} + define void @not_vectorized(ptr nocapture %a, i64 %size) { entry: %cmp1 = icmp sle i64 %size, 0 @@ -56,3 +86,5 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %entry, %for.body ret void } + +declare void @init_mem(ptr, i64); diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 2f192ad7a9ea9..fdd5e0e7958ec 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -55,17 +55,18 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 0 -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 -; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 +; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 -; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer ; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer -; VF8UF2-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP3]], [[TMP6]] -; VF8UF2-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP4]]) -; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]] +; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VF8UF2-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP6]]) +; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF2-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; VF8UF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF8UF2: [[MIDDLE_SPLIT]]: -; VF8UF2-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8UF2-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: @@ -82,7 +83,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[LOOP_LATCH]]: ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; VF8UF2: [[EXIT]]: ; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] ; VF8UF2-NEXT: ret i8 [[RES]] @@ -192,27 +193,23 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 0 -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 -; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 +; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 -; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer ; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer -; VF8UF2-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP3]], [[TMP6]] -; VF8UF2-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP4]]) -; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]] +; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VF8UF2-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP6]]) +; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF2-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; VF8UF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF8UF2: [[MIDDLE_SPLIT]]: -; VF8UF2-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8UF2-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: ; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP6]], i1 true) -; VF8UF2-NEXT: [[TMP10:%.*]] = add i64 8, [[TMP8]] -; VF8UF2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true) -; VF8UF2-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]] -; VF8UF2-NEXT: [[TMP9:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE]], 8 -; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i64 [[TMP5]], i64 [[TMP10]] -; VF8UF2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]] +; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF8UF2-NEXT: br label %[[EXIT]] ; VF8UF2: [[SCALAR_PH]]: ; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] @@ -226,9 +223,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[LOOP_LATCH]]: ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] ; VF8UF2: [[EXIT]]: -; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT]] ] +; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VECTOR_EARLY_EXIT]] ] ; VF8UF2-NEXT: ret i64 [[RES]] ; ; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside( diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll index 5cb2c4530aa57..8e25c9c5547d6 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll @@ -567,22 +567,19 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, ; ; SSE4-LABEL: @buildvector_mul_subadd_ps256( ; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] -; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] -; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> -; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> -; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> -; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> -; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> +; SSE4-NEXT: [[TMP5:%.*]] = fsub <8 x float> [[A]], [[B]] +; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> +; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> ; SSE4-NEXT: ret <8 x float> [[TMP6]] ; ; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256( ; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] -; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] -; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> -; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] ; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> -; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> +; AVX_FMA4-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> ; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> ; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> ; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]] @@ -677,13 +674,11 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> ; ; AVX_FMA-LABEL: @buildvector_mul_subadd_ps512( ; AVX_FMA-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]] -; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]] -; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B:%.*]] ; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> -; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> -; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP5]], <16 x i32> -; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP6]], <16 x float> poison, <16 x i32> +; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <16 x float> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <16 x i32> +; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP6]], <16 x i32> ; AVX_FMA-NEXT: ret <16 x float> [[TMP7]] ; ; AVX512-LABEL: @buildvector_mul_subadd_ps512( @@ -880,13 +875,11 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> ; ; AVX_FMA-LABEL: @buildvector_mul_subadd_pd512( ; AVX_FMA-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] -; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]] -; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> -; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B:%.*]] ; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP5]], <8 x i32> -; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> +; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> +; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP6]], <8 x i32> ; AVX_FMA-NEXT: ret <8 x double> [[TMP7]] ; ; AVX512-LABEL: @buildvector_mul_subadd_pd512( diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll index 9f9e9d84108e6..9c615bb4757fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll @@ -12,9 +12,10 @@ define void @foo(ptr %0) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> poison, <4 x ptr> [[TMP3]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> [[TMP11]], <4 x ptr> [[TMP5]], i64 4) -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP7]], <8 x ptr> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP12]], <8 x ptr> poison, <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x ptr> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll index 9327fe8995d45..8d44d03e0e5cc 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll @@ -11,7 +11,7 @@ define i32 @test(ptr %c) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v6i64(<8 x i64> poison, <6 x i64> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll index 19b6d82818532..442769937ac12 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll @@ -16,9 +16,7 @@ define void @test1(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) { ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[ADD:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[TMP6]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]] ; CHECK: for.end27: @@ -57,9 +55,7 @@ define void @test2(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) { ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[ADD:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[TMP6]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]] ; CHECK: for.end27: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll index 7ae44c274ff6d..fcbe2d631ba8b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -164,7 +164,8 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y ; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T10]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T12]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP14]], [[SUM_032]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 6c5220d13b7a2..bb05440910130 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -420,27 +420,26 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]] -; TODO: Dead code must be removed below. ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4 -; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4 -; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 1 -; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 -; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 +; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP4]] to i32 ; CHECK-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 -; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 +; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 5 ; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX13_2]], align 1 ; CHECK-NEXT: [[CONV14_2:%.*]] = zext i8 [[TMP6]] to i32 @@ -454,17 +453,17 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX22_2]], align 1 ; CHECK-NEXT: [[CONV23_2:%.*]] = zext i8 [[TMP9]] to i32 ; CHECK-NEXT: [[ARRAYIDX25_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 6 -; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 -; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP41]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 +; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ARRAYIDX27_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 6 -; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 -; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP42]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 +; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 3 -; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 -; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP43]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 +; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP12]] to i32 ; CHECK-NEXT: [[ARRAYIDX34_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 3 -; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 -; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP44]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 +; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-NEXT: [[ARRAYIDX37_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 7 ; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX37_2]], align 1 ; CHECK-NEXT: [[CONV38_2:%.*]] = zext i8 [[TMP14]] to i32 @@ -478,17 +477,17 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ADD_PTR64_2]], align 1 ; CHECK-NEXT: [[CONV2_3:%.*]] = zext i8 [[TMP17]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 4 -; CHECK-NEXT: [[TMP48:%.*]] = load i8, ptr [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP48]] to i32 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX3_3]], align 1 +; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP18]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 4 -; CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP19]] to i32 ; CHECK-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 1 -; CHECK-NEXT: [[TMP50:%.*]] = load i8, ptr [[ARRAYIDX8_3]], align 1 -; CHECK-NEXT: [[CONV9_3:%.*]] = zext i8 [[TMP50]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX8_3]], align 1 +; CHECK-NEXT: [[CONV9_3:%.*]] = zext i8 [[TMP20]] to i32 ; CHECK-NEXT: [[ARRAYIDX10_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 1 -; CHECK-NEXT: [[TMP51:%.*]] = load i8, ptr [[ARRAYIDX10_3]], align 1 -; CHECK-NEXT: [[CONV11_3:%.*]] = zext i8 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[ARRAYIDX10_3]], align 1 +; CHECK-NEXT: [[CONV11_3:%.*]] = zext i8 [[TMP21]] to i32 ; CHECK-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 5 ; CHECK-NEXT: [[TMP22:%.*]] = load i8, ptr [[ARRAYIDX13_3]], align 1 ; CHECK-NEXT: [[CONV14_3:%.*]] = zext i8 [[TMP22]] to i32 @@ -519,28 +518,35 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[ARRAYIDX39_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 7 ; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr [[ARRAYIDX39_3]], align 1 ; CHECK-NEXT: [[CONV40_3:%.*]] = zext i8 [[TMP31]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 -; CHECK-NEXT: [[TMP38:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP38]], <4 x i8> [[TMP4]], i64 4) -; CHECK-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP1]], i64 8) -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP5]], i64 12) -; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP45:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP46:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP45]], <4 x i8> [[TMP12]], i64 4) -; CHECK-NEXT: [[TMP47:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP46]], <4 x i8> [[TMP3]], i64 8) -; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP47]], <4 x i8> [[TMP13]], i64 12) -; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = mul <16 x i32> [[TMP11]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP20]]) -; CHECK-NEXT: ret i32 [[TMP21]] +; CHECK-NEXT: [[TMP32:%.*]] = load <4 x i8>, ptr [[P1]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = load <4 x i8>, ptr [[P2]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[TMP35:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[TMP36:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP32]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <4 x i8> [[TMP36]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP32]], <4 x i8> [[TMP36]], <16 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <16 x i8> [[TMP40]], <16 x i8> [[TMP41]], <16 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP37]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i8> [[TMP42]], <16 x i8> [[TMP43]], <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = zext <16 x i8> [[TMP44]] to <16 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP47:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x i8> [[TMP46]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> [[TMP46]], <16 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP35]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i8> [[TMP50]], <16 x i8> [[TMP51]], <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <4 x i8> [[TMP47]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <16 x i8> [[TMP52]], <16 x i8> [[TMP53]], <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = zext <16 x i8> [[TMP54]] to <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = mul <16 x i32> [[TMP45]], [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP56]]) +; CHECK-NEXT: ret i32 [[TMP57]] ; +; TODO: Dead code must be removed below. entry: %idx.ext = sext i32 %off1 to i64 %idx.ext63 = sext i32 %off2 to i64 @@ -1016,69 +1022,68 @@ define void @store_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]] -; TODO: Dead code must be removed below. ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4 -; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4 -; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 1 -; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 -; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 +; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP4]] to i32 ; CHECK-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 -; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 +; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 5 -; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX13_2]], align 1 -; CHECK-NEXT: [[CONV14_2:%.*]] = zext i8 [[TMP38]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX13_2]], align 1 +; CHECK-NEXT: [[CONV14_2:%.*]] = zext i8 [[TMP6]] to i32 ; CHECK-NEXT: [[ARRAYIDX15_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 5 -; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX15_2]], align 1 -; CHECK-NEXT: [[CONV16_2:%.*]] = zext i8 [[TMP39]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX15_2]], align 1 +; CHECK-NEXT: [[CONV16_2:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 2 -; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[ARRAYIDX20_2]], align 1 -; CHECK-NEXT: [[CONV21_2:%.*]] = zext i8 [[TMP40]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX20_2]], align 1 +; CHECK-NEXT: [[CONV21_2:%.*]] = zext i8 [[TMP8]] to i32 ; CHECK-NEXT: [[ARRAYIDX22_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 2 -; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX22_2]], align 1 -; CHECK-NEXT: [[CONV23_2:%.*]] = zext i8 [[TMP41]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX22_2]], align 1 +; CHECK-NEXT: [[CONV23_2:%.*]] = zext i8 [[TMP9]] to i32 ; CHECK-NEXT: [[ARRAYIDX25_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 6 -; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 -; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP42]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 +; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ARRAYIDX27_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 6 -; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 -; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP43]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 +; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 3 -; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 -; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP44]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 +; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP12]] to i32 ; CHECK-NEXT: [[ARRAYIDX34_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 3 -; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 -; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP45]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 +; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-NEXT: [[ARRAYIDX37_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 7 -; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX37_2]], align 1 -; CHECK-NEXT: [[CONV38_2:%.*]] = zext i8 [[TMP46]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX37_2]], align 1 +; CHECK-NEXT: [[CONV38_2:%.*]] = zext i8 [[TMP14]] to i32 ; CHECK-NEXT: [[ARRAYIDX39_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 7 -; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[ARRAYIDX39_2]], align 1 -; CHECK-NEXT: [[CONV40_2:%.*]] = zext i8 [[TMP47]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX39_2]], align 1 +; CHECK-NEXT: [[CONV40_2:%.*]] = zext i8 [[TMP15]] to i32 ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP48:%.*]] = load i8, ptr [[ADD_PTR_2]], align 1 -; CHECK-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP48]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ADD_PTR_2]], align 1 +; CHECK-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP16]] to i32 ; CHECK-NEXT: [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[ADD_PTR64_2]], align 1 -; CHECK-NEXT: [[CONV2_3:%.*]] = zext i8 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ADD_PTR64_2]], align 1 +; CHECK-NEXT: [[CONV2_3:%.*]] = zext i8 [[TMP17]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 4 -; CHECK-NEXT: [[TMP50:%.*]] = load i8, ptr [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP50]] to i32 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX3_3]], align 1 +; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP18]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 4 -; CHECK-NEXT: [[TMP51:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP19]] to i32 ; CHECK-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 1 ; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX8_3]], align 1 ; CHECK-NEXT: [[CONV9_3:%.*]] = zext i8 [[TMP20]] to i32 @@ -1118,32 +1123,33 @@ define void @store_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[DST4:%.*]] = getelementptr inbounds i32, ptr [[DST0:%.*]], i64 4 ; CHECK-NEXT: [[DST8:%.*]] = getelementptr inbounds i32, ptr [[DST0]], i64 8 ; CHECK-NEXT: [[DST12:%.*]] = getelementptr inbounds i32, ptr [[DST0]], i64 12 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[P2]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[TMP5]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i8> [[TMP12]] to <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> [[TMP11]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = zext <4 x i8> [[TMP15]] to <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i8> [[TMP17]] to <4 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = mul <4 x i32> [[TMP16]], [[TMP18]] -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[DST0]], align 4 -; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[DST4]], align 4 -; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[DST8]], align 4 -; CHECK-NEXT: store <4 x i32> [[TMP19]], ptr [[DST12]], align 4 +; CHECK-NEXT: [[TMP32:%.*]] = load <4 x i8>, ptr [[P1]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i8> [[TMP32]] to <4 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[TMP35:%.*]] = zext <4 x i8> [[TMP34]] to <4 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i32> [[TMP33]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = load <4 x i8>, ptr [[P2]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = zext <4 x i8> [[TMP37]] to <4 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = mul <4 x i32> [[TMP38]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = zext <4 x i8> [[TMP44]] to <4 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = mul <4 x i32> [[TMP43]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 +; CHECK-NEXT: [[TMP48:%.*]] = zext <4 x i8> [[TMP47]] to <4 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i8> [[TMP49]] to <4 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = mul <4 x i32> [[TMP48]], [[TMP50]] +; CHECK-NEXT: store <4 x i32> [[TMP36]], ptr [[DST0]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP41]], ptr [[DST4]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP46]], ptr [[DST8]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP51]], ptr [[DST12]], align 4 ; CHECK-NEXT: ret void ; +; TODO: Dead code must be removed below. entry: %idx.ext = sext i32 %off1 to i64 %idx.ext63 = sext i32 %off2 to i64 @@ -1422,29 +1428,41 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP13]], <4 x i8> [[TMP4]], i64 4) -; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP14]], <4 x i8> [[TMP8]], i64 8) -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP12]], i64 12) +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP21]], <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[TMP17]] to <16 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP1]], i64 0) -; CHECK-NEXT: [[TMP20:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP5]], i64 4) -; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP20]], <4 x i8> [[TMP9]], i64 8) -; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP19]], i64 12) +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = sub nsw <16 x i32> [[TMP18]], [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[TMP29:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP30:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP29]], <4 x i8> [[TMP6]], i64 4) -; CHECK-NEXT: [[TMP28:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP30]], <4 x i8> [[TMP10]], i64 8) -; CHECK-NEXT: [[TMP32:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP28]], <4 x i8> [[TMP27]], i64 12) +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP67]], <16 x i8> [[TMP35]], <16 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP27]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP35:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP3]], i64 0) -; CHECK-NEXT: [[TMP36:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP35]], <4 x i8> [[TMP7]], i64 4) -; CHECK-NEXT: [[TMP37:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP36]], <4 x i8> [[TMP11]], i64 8) -; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP37]], <4 x i8> [[TMP34]], i64 12) +; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i8> [[TMP70]], <16 x i8> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP72]], <16 x i8> [[TMP73]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] ; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll index 07411cacb3626..9562e6d41f7cd 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll @@ -29,14 +29,21 @@ define i64 @straight(ptr nocapture noundef readonly %p, i32 noundef %st) { ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr [[ADD_PTR_4]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[ADD_PTR_5]], align 2 ; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr [[ADD_PTR_6]], align 2 -; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> poison, <8 x i16> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP8]], <8 x i16> [[TMP1]], i64 8) -; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP9]], <8 x i16> [[TMP2]], i64 16) -; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP10]], <8 x i16> [[TMP3]], i64 24) -; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP11]], <8 x i16> [[TMP4]], i64 32) -; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP12]], <8 x i16> [[TMP5]], i64 40) -; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP13]], <8 x i16> [[TMP6]], i64 48) -; CHECK-NEXT: [[TMP15:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP14]], <8 x i16> [[TMP7]], i64 56) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <64 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <64 x i16> [[TMP10]], <64 x i16> [[TMP11]], <64 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <64 x i16> [[TMP12]], <64 x i16> [[TMP13]], <64 x i32> +; CHECK-NEXT: [[TMP83:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <64 x i16> [[TMP14]], <64 x i16> [[TMP83]], <64 x i32> +; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <64 x i16> [[TMP84]], <64 x i16> [[TMP85]], <64 x i32> +; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <64 x i16> [[TMP86]], <64 x i16> [[TMP87]], <64 x i32> +; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <64 x i16> [[TMP88]], <64 x i16> [[TMP89]], <64 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = zext <64 x i16> [[TMP15]] to <64 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <64 x i32> [[TMP16]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <64 x i32> [[TMP16]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll index 677d52bf3b4c3..0e3d79900d435 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll @@ -3,13 +3,19 @@ ; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,FULLFP16 define half @reduce_fast_half2(<2 x half> %vec2) { -; CHECK-LABEL: define half @reduce_fast_half2( -; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 -; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] -; CHECK-NEXT: ret half [[ADD1]] +; NOFP16-LABEL: define half @reduce_fast_half2( +; NOFP16-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] { +; NOFP16-NEXT: [[ENTRY:.*:]] +; NOFP16-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 +; NOFP16-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 +; NOFP16-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] +; NOFP16-NEXT: ret half [[ADD1]] +; +; FULLFP16-LABEL: define half @reduce_fast_half2( +; FULLFP16-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] { +; FULLFP16-NEXT: [[ENTRY:.*:]] +; FULLFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[VEC2]]) +; FULLFP16-NEXT: ret half [[TMP0]] ; entry: %elt0 = extractelement <2 x half> %vec2, i64 0 @@ -20,7 +26,7 @@ entry: define half @reduce_half2(<2 x half> %vec2) { ; CHECK-LABEL: define half @reduce_half2( -; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 @@ -269,9 +275,7 @@ define float @reduce_fast_float2(<2 x float> %vec2) { ; CHECK-LABEL: define float @reduce_fast_float2( ; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0 -; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]] +; CHECK-NEXT: [[ADD1:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[VEC2]]) ; CHECK-NEXT: ret float [[ADD1]] ; entry: @@ -409,9 +413,7 @@ define double @reduce_fast_double2(<2 x double> %vec2) { ; CHECK-LABEL: define double @reduce_fast_double2( ; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0 -; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]] +; CHECK-NEXT: [[ADD1:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[VEC2]]) ; CHECK-NEXT: ret double [[ADD1]] ; entry: @@ -552,8 +554,9 @@ define float @reduce_fast_float_case2(ptr %a, ptr %b) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[TMP1]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP2]], <4 x float> [[TMP0]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP0]], <8 x i32> ; CHECK-NEXT: [[RED3:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) ; CHECK-NEXT: ret float [[RED3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll index a504f3ed02014..64bdcf28af550 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll @@ -15,7 +15,8 @@ define fastcc i64 @zot(float %arg, float %arg1, float %arg2, float %arg3, float ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> , [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[ARG3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x float> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP2]], <2 x float> [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP6]], ; CHECK-NEXT: br i1 [[ARG6:%.*]], label [[BB18:%.*]], label [[BB57:%.*]] ; CHECK: bb18: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll index 4f88182374622..0783a28f56d85 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll @@ -13,7 +13,8 @@ define void @p(double %0) { ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> , <2 x double> [[TMP7]], i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x double> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fptosi <4 x double> [[TMP12]] to <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll index 2191d04cd797d..833bc56c4ec6b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll @@ -7,7 +7,8 @@ define void @vec3_vectorize_call(ptr %Colour, float %0) { ; NON-POWER-OF-2-NEXT: entry: ; NON-POWER-OF-2-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4 ; NON-POWER-OF-2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 2 -; NON-POWER-OF-2-NEXT: [[TMP4:%.*]] = call <3 x float> @llvm.vector.insert.v3f32.v2f32(<3 x float> [[TMP2]], <2 x float> [[TMP1]], i64 0) +; NON-POWER-OF-2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <3 x i32> +; NON-POWER-OF-2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> [[TMP3]], <3 x i32> ; NON-POWER-OF-2-NEXT: [[TMP5:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP4]], <3 x float> zeroinitializer, <3 x float> zeroinitializer) ; NON-POWER-OF-2-NEXT: store <3 x float> [[TMP5]], ptr [[COLOUR]], align 4 ; NON-POWER-OF-2-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 61a944101586b..c728572313d77 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -253,13 +253,14 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) { ; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP1]], i8 [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[GEP_9]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_12]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> [[TMP4]], <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12) +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP10]], <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1) ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll index cd79250e8fb6b..b772e4be3b0aa 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll @@ -7,8 +7,9 @@ define void @test(ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[P]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[GEP2]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> poison, <2 x i16> [[TMP1]], i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> [[TMP3]], <2 x i16> [[TMP2]], i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP2]], <4 x i32> ; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[P1]], align 2 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 15425c38bbb04..5ee9f3ca46ca8 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -88,7 +88,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i8> poison, i8 [[TMP115]], i32 0 ; CHECK-NEXT: [[TMP70:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP71:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP70]], <2 x i8> [[TMP62]], i64 2) +; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x i8> [[TMP62]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <4 x i8> [[TMP70]], <4 x i8> [[TMP117]], <4 x i32> ; CHECK-NEXT: [[TMP72:%.*]] = zext <4 x i8> [[TMP71]] to <4 x i32> ; CHECK-NEXT: [[TMP73:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; CHECK-NEXT: [[TMP74:%.*]] = zext <4 x i8> [[TMP73]] to <4 x i32> @@ -112,7 +113,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]] ; CHECK-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]] ; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP94:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP93]], <4 x i32> [[TMP91]], i64 4) +; CHECK-NEXT: [[TMP118:%.*]] = shufflevector <4 x i32> [[TMP91]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP94:%.*]] = shufflevector <8 x i32> [[TMP93]], <8 x i32> [[TMP118]], <8 x i32> ; CHECK-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]] ; CHECK-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]] ; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> @@ -220,7 +222,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP68:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP69:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0 ; THR15-NEXT: [[TMP70:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP0]], i32 1 -; THR15-NEXT: [[TMP71:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP70]], <2 x i8> [[TMP62]], i64 2) +; THR15-NEXT: [[TMP116:%.*]] = shufflevector <2 x i8> [[TMP62]], <2 x i8> poison, <4 x i32> +; THR15-NEXT: [[TMP71:%.*]] = shufflevector <4 x i8> [[TMP70]], <4 x i8> [[TMP116]], <4 x i32> ; THR15-NEXT: [[TMP72:%.*]] = zext <4 x i8> [[TMP71]] to <4 x i32> ; THR15-NEXT: [[TMP73:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; THR15-NEXT: [[TMP74:%.*]] = zext <4 x i8> [[TMP73]] to <4 x i32> @@ -244,7 +247,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]] ; THR15-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]] ; THR15-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> -; THR15-NEXT: [[TMP94:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP93]], <4 x i32> [[TMP91]], i64 4) +; THR15-NEXT: [[TMP117:%.*]] = shufflevector <4 x i32> [[TMP91]], <4 x i32> poison, <8 x i32> +; THR15-NEXT: [[TMP94:%.*]] = shufflevector <8 x i32> [[TMP93]], <8 x i32> [[TMP117]], <8 x i32> ; THR15-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]] ; THR15-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]] ; THR15-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll index cc88718484172..82c940353ba5a 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll @@ -14,8 +14,9 @@ define i16 @test(ptr %i) { ; CHECK: [[FOR_COND5_US]]: ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i64(ptr align 2 [[GEP_US154_2]], i64 4914, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> [[TMP3]], i32 2, <4 x i1> splat (i1 true), <4 x i16> poison) -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP6]], <4 x i16> [[TMP5]], i64 4) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[TMP5]], <4 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP8]], i16 0) ; CHECK-NEXT: ret i16 [[TMP9]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll index 9269a710c61d3..8e80aee7070a9 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll @@ -11,11 +11,12 @@ define void @test(ptr %c) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> ; CHECK-NEXT: br label %[[FOR_COND:.*]] ; CHECK: [[FOR_COND]]: ; CHECK-NEXT: [[A_PROMOTED2226:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FOR_COND]] ] -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP5]], i64 8) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP8]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP7]]) ; CHECK-NEXT: br label %[[FOR_COND]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll index 78b5acad0df9a..457f2600b539f 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll @@ -45,12 +45,14 @@ define float @test(ptr %x) { ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call fast <8 x float> @llvm.vector.extract.v8f32.v16f32(<16 x float> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <8 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = fadd fast <8 x float> [[TMP5]], [[TMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[RDX_OP]], i64 0) -; CHECK-NEXT: [[RDX_OP4:%.*]] = call fast <4 x float> @llvm.vector.extract.v4f32.v16f32(<16 x float> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[RDX_OP]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[RDX_OP4:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP5:%.*]] = fadd fast <4 x float> [[RDX_OP4]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = call fast <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> [[RDX_OP5]], i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[RDX_OP5]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP9]], <16 x i32> ; CHECK-NEXT: [[OP_RDX1:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP8]]) ; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]] ; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll index 1e7cc9c268cfa..b6a40f0162bbd 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll @@ -12,7 +12,8 @@ define fastcc void @rephase(ptr %phases_in, ptr %157, i64 %158) { ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IMAG_1_251]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> , <2 x double> [[TMP3]], i64 2) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP8]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[IMAG_247]], align 8 ; CHECK-NEXT: store double [[TMP2]], ptr [[PHASES_IN]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index db09843a6ef72..5bc2e94485432 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -1027,8 +1027,9 @@ define i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Q:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[TMP5]] @@ -1075,8 +1076,9 @@ define i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Y:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll index 258b0ec0bcfc7..f6e4643006816 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll @@ -17,12 +17,13 @@ define void @test1(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, < ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <4 x float> [[LOAD6:%.*]], <4 x float> [[LOAD7:%.*]], <4 x i32> ; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[VEXT165_I]], i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP0]], <4 x float> [[VEXT309_I]], i64 4) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[LOAD17:%.*]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[FMULADD7:%.*]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP5]], <4 x float> [[FMULADD16:%.*]], i64 4) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[VEXT165_I]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[VEXT309_I]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[LOAD17:%.*]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[FMULADD7:%.*]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[FMULADD16:%.*]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> [[TMP5]], <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[TMP6]]) ; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4 ; CHECK-NEXT: ret void @@ -55,12 +56,13 @@ define void @test2(<8 x float> %load6, <8 x float> %load7, <8 x float> %load8, < ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <8 x float> [[LOAD6:%.*]], <8 x float> [[LOAD7:%.*]], <8 x i32> ; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <8 x float> [[LOAD7]], <8 x float> [[LOAD8:%.*]], <8 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[VEXT165_I]], i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[VEXT309_I]], i64 8) -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[LOAD17:%.*]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[FMULADD7:%.*]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP5]], <8 x float> [[FMULADD16:%.*]], i64 8) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[VEXT165_I]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[VEXT309_I]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[LOAD17:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[FMULADD7:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[FMULADD16:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP8]], <16 x float> [[TMP5]], <16 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP4]], <16 x float> [[TMP6]]) ; CHECK-NEXT: store <16 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index 651f565412830..da08718d5c248 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -8,8 +8,7 @@ define i32 @test() { ; CHECK-NEXT: br label [[IF_END_I87:%.*]] ; CHECK: if.end.i87: ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> , <4 x i64> ), i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> , <4 x i32> ; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [ ; CHECK-NEXT: i32 1, label [[SW_BB509_I]] ; CHECK-NEXT: i32 0, label [[IF_THEN458_I:%.*]] @@ -51,21 +50,15 @@ define void @test2() { ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x float> [[TMP6]], <32 x float> [[TMP7]], <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x float> [[TMP10]], <32 x float> [[TMP11]], <32 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double> -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> zeroinitializer, [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float> -; CHECK-NEXT: [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = fcmp ogt <32 x float> zeroinitializer, [[TMP15]] ; CHECK-NEXT: ret void ; entry: @@ -101,20 +94,17 @@ define void @test3(float %0) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2) ; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x float> [ zeroinitializer, [[FOR_BODY_LR_PH]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2) -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i1> [[TMP5]], <2 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i1> , <4 x i1> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP9]], <4 x float> [[TMP6]], <4 x float> zeroinitializer ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; entry: @@ -142,24 +132,25 @@ define ptr @test4() { ; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> -; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0) -; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2) +; POWEROF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; POWEROF2-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; POWEROF2-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP16]], <4 x i32> ; POWEROF2-NEXT: br label [[TMP8:%.*]] -; POWEROF2: 7: -; POWEROF2-NEXT: br label [[TMP8]] ; POWEROF2: 8: +; POWEROF2-NEXT: br label [[TMP8]] +; POWEROF2: 9: ; POWEROF2-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, [[TMP7:%.*]] ], [ [[TMP4]], [[TMP0:%.*]] ] ; POWEROF2-NEXT: [[TMP10:%.*]] = phi <4 x float> [ poison, [[TMP7]] ], [ [[TMP6]], [[TMP0]] ] ; POWEROF2-NEXT: br label [[TMP11:%.*]] -; POWEROF2: 11: -; POWEROF2-NEXT: [[TMP12:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 0) +; POWEROF2: 12: +; POWEROF2-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer -; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2) +; POWEROF2-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]] -; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0 -; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP18]] -; POWEROF2-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[TMP9]], i32 1 -; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP30]], 0.000000e+00 +; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 0 +; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]] +; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 1 +; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00 ; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0 ; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]] ; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0 @@ -176,18 +167,19 @@ define ptr @test4() { ; NONPOWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer ; NONPOWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> ; NONPOWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> -; NONPOWEROF2-NEXT: [[TMP4:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> poison, <3 x float> [[TMP2]], i64 0) -; NONPOWEROF2-NEXT: [[TMP5:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> [[TMP4]], <3 x float> [[TMP3]], i64 3) +; NONPOWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <6 x i32> +; NONPOWEROF2-NEXT: [[TMP18:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <6 x i32> +; NONPOWEROF2-NEXT: [[TMP5:%.*]] = shufflevector <6 x float> [[TMP4]], <6 x float> [[TMP18]], <6 x i32> ; NONPOWEROF2-NEXT: br label [[TMP7:%.*]] -; NONPOWEROF2: 6: -; NONPOWEROF2-NEXT: br label [[TMP7]] ; NONPOWEROF2: 7: +; NONPOWEROF2-NEXT: br label [[TMP7]] +; NONPOWEROF2: 8: ; NONPOWEROF2-NEXT: [[TMP8:%.*]] = phi <6 x float> [ poison, [[TMP6:%.*]] ], [ [[TMP5]], [[TMP0:%.*]] ] ; NONPOWEROF2-NEXT: br label [[TMP9:%.*]] -; NONPOWEROF2: 9: -; NONPOWEROF2-NEXT: [[TMP10:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 0) +; NONPOWEROF2: 10: +; NONPOWEROF2-NEXT: [[TMP10:%.*]] = shufflevector <6 x float> [[TMP8]], <6 x float> poison, <3 x i32> ; NONPOWEROF2-NEXT: [[TMP11:%.*]] = fmul <3 x float> zeroinitializer, [[TMP10]] -; NONPOWEROF2-NEXT: [[TMP12:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 3) +; NONPOWEROF2-NEXT: [[TMP12:%.*]] = shufflevector <6 x float> [[TMP8]], <6 x float> poison, <3 x i32> ; NONPOWEROF2-NEXT: [[TMP13:%.*]] = fmul <3 x float> zeroinitializer, [[TMP12]] ; NONPOWEROF2-NEXT: [[TMP14:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP11]]) ; NONPOWEROF2-NEXT: [[TMP15:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP13]]) @@ -235,21 +227,9 @@ define ptr @test4() { define i32 @test5() { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> [[TMP0]], <2 x double> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> poison, <2 x double> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP3]], <2 x double> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP4]], <2 x double> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP5]], <2 x double> zeroinitializer, i64 6) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> poison, <2 x double> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP7]], <2 x double> zeroinitializer, i64 6) -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> poison, <4 x double> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> [[TMP8]], <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fadd <8 x double> [[TMP6]], [[TMP10]] ; CHECK-NEXT: br label [[FOR_END47:%.*]] ; CHECK: for.end47: -; CHECK-NEXT: [[TMP12:%.*]] = phi <8 x double> [ [[TMP11]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <8 x double> [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll index 4dd659a7ae802..510cf45edbb52 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll @@ -20,10 +20,10 @@ define void @test(ptr %mdct_forward_x) { ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true), <4 x float> poison) ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <3 x float> [[TMP5]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> , <4 x float> [[TMP22]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP11]], <2 x float> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = fsub <4 x float> [[TMP9]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = fadd <4 x float> [[TMP9]], [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll index 481d586e6658a..27de36e601512 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll @@ -513,21 +513,16 @@ define i32 @dot_product_i32(ptr %a, ptr %b) { ; ; POW2-ONLY-LABEL: @dot_product_i32( ; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0 -; POW2-ONLY-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4 -; POW2-ONLY-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1 -; POW2-ONLY-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4 ; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2 ; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4 ; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0 -; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4 -; POW2-ONLY-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1 -; POW2-ONLY-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4 ; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2 ; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4 -; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]] -; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]] +; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_A_0]], align 4 +; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[GEP_B_0]], align 4 +; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <2 x i32> [[TMP1]], [[TMP2]] ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]] -; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]] +; POW2-ONLY-NEXT: [[ADD_0:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP3]]) ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]] ; POW2-ONLY-NEXT: ret i32 [[ADD_1]] ; @@ -568,21 +563,16 @@ define i32 @dot_product_i32_reorder(ptr %a, ptr %b) { ; ; POW2-ONLY-LABEL: @dot_product_i32_reorder( ; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0 -; POW2-ONLY-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4 -; POW2-ONLY-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1 -; POW2-ONLY-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4 ; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2 ; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4 ; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0 -; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4 -; POW2-ONLY-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1 -; POW2-ONLY-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4 ; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2 ; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4 -; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]] -; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]] +; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_A_0]], align 4 +; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[GEP_B_0]], align 4 +; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <2 x i32> [[TMP1]], [[TMP2]] ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]] -; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_1]], [[MUL_0]] +; POW2-ONLY-NEXT: [[ADD_0:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP3]]) ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]] ; POW2-ONLY-NEXT: ret i32 [[ADD_1]] ; @@ -630,9 +620,7 @@ define float @dot_product_fp32(ptr %a, ptr %b) { ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4 ; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]] ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]] -; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP4]], [[TMP5]] +; POW2-ONLY-NEXT: [[ADD_0:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[TMP3]]) ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]] ; POW2-ONLY-NEXT: ret float [[ADD_1]] ; @@ -682,9 +670,7 @@ define float @dot_product_fp32_reorder(ptr %a, ptr %b) { ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4 ; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]] ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]] -; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP5]], [[TMP4]] +; POW2-ONLY-NEXT: [[ADD_0:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[TMP3]]) ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]] ; POW2-ONLY-NEXT: ret float [[ADD_1]] ; @@ -733,9 +719,7 @@ define double @dot_product_fp64(ptr %a, ptr %b) { ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[GEP_B_0]], align 4 ; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]] ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]] -; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 -; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 -; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast double [[TMP4]], [[TMP5]] +; POW2-ONLY-NEXT: [[ADD_0:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[TMP3]]) ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]] ; POW2-ONLY-NEXT: ret double [[ADD_1]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll index 9e6270376ddd4..0d1de729bf18c 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -13,7 +13,8 @@ define void @foo() { ; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP2]], <2 x i32> [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> , i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr <4 x i32> [[TMP5]], splat (i32 6) diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll index 5681fb7346124..dbeff25954085 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll @@ -16,8 +16,10 @@ define void @test(i32 %0, i64 %1, i32 %2, i32 %3, ptr %4) { ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <60 x i32> [[TMP14]], i32 [[TMP98]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <60 x i32> [[TMP15]], i32 [[TMP73]], i32 6 ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <60 x i32> [[TMP16]], <60 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP17]], <2 x i32> [[TMP8]], i64 2) -; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP18]], <2 x i32> [[TMP8]], i64 4) +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <60 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <60 x i32> [[TMP16]], <60 x i32> [[TMP22]], <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP23]], <8 x i32> [[TMP18]], <8 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP19]], <8 x i32> poison, <60 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = xor <60 x i32> [[TMP12]], [[TMP20]] ; CHECK-NEXT: [[TMP130:%.*]] = call i32 @llvm.vector.reduce.or.v60i32(<60 x i32> [[TMP21]]) diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll index 1dd6c7b81fb73..3f4436f33fad6 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll @@ -25,23 +25,19 @@ define void @e(<4 x i16> %0) { ; ; THRESH-LABEL: @e( ; THRESH-NEXT: entry: -; THRESH-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; THRESH-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP1]], <4 x i16> zeroinitializer, i64 4) -; THRESH-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 4) -; THRESH-NEXT: [[TMP4:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; THRESH-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP4]], <4 x i16> zeroinitializer, i64 4) -; THRESH-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP5]], <4 x i16> zeroinitializer, i64 8) -; THRESH-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP6]], <4 x i16> zeroinitializer, i64 12) ; THRESH-NEXT: br label [[VECTOR_BODY:%.*]] ; THRESH: vector.body: ; THRESH-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ] ; THRESH-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] -; THRESH-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP3]], <4 x i16> [[VEC_IND]], i64 0) -; THRESH-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP2]], [[TMP8]] +; THRESH-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_IND]], <4 x i16> poison, <8 x i32> +; THRESH-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> , <8 x i16> [[TMP1]], <8 x i32> +; THRESH-NEXT: [[TMP3:%.*]] = add <8 x i16> zeroinitializer, [[TMP8]] ; THRESH-NEXT: [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <16 x i32> -; THRESH-NEXT: [[TMP11:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP10]], <4 x i16> [[TMP0:%.*]], i64 4) -; THRESH-NEXT: [[TMP12:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP11]], <8 x i16> [[TMP9]], i64 8) -; THRESH-NEXT: [[TMP13:%.*]] = icmp sgt <16 x i16> [[TMP12]], [[TMP7]] +; THRESH-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0:%.*]], <4 x i16> poison, <16 x i32> +; THRESH-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[TMP5]], <16 x i32> +; THRESH-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <16 x i32> +; THRESH-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[TMP7]], <16 x i32> +; THRESH-NEXT: [[TMP13:%.*]] = icmp sgt <16 x i16> [[TMP9]], zeroinitializer ; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> ; THRESH-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) ; THRESH-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> poison, i1 [[TMP15]], i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll index 65e5458b25d2f..6be51062f6fa1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll @@ -13,7 +13,8 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP6]], <4 x float> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x float> [[TMP5]] ; ; SLM-LABEL: @sitofp_uitofp( @@ -22,7 +23,8 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP6]], <4 x float> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; ; AVX-LABEL: @sitofp_uitofp( @@ -77,7 +79,8 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @fptosi_fptoui( @@ -86,7 +89,8 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX-LABEL: @fptosi_fptoui( @@ -143,7 +147,8 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { ; SSE2-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -2147483648) ; SSE2-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], splat (i32 2147483647) ; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP9]], <8 x i32> ; SSE2-NEXT: [[DOTUNCASTED:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> ; SSE2-NEXT: ret <8 x float> [[DOTUNCASTED]] ; @@ -155,7 +160,8 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { ; SLM-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -2147483648) ; SLM-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], splat (i32 2147483647) ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP9]], <8 x i32> ; SLM-NEXT: [[DOTUNCASTED:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> ; SLM-NEXT: ret <8 x float> [[DOTUNCASTED]] ; @@ -233,7 +239,8 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @sext_zext( @@ -242,7 +249,8 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX-LABEL: @sext_zext( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll index fad46870ec475..1db428706047a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -13,7 +13,8 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP6]], <4 x float> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x float> [[TMP5]] ; ; SLM-LABEL: @sitofp_uitofp( @@ -22,7 +23,8 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP6]], <4 x float> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; ; AVX-LABEL: @sitofp_uitofp( @@ -77,7 +79,8 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @fptosi_fptoui( @@ -86,7 +89,8 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX-LABEL: @fptosi_fptoui( @@ -143,7 +147,8 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { ; SSE2-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -2147483648) ; SSE2-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], splat (i32 2147483647) ; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP9]], <8 x i32> ; SSE2-NEXT: [[DOTUNCASTED:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> ; SSE2-NEXT: ret <8 x float> [[DOTUNCASTED]] ; @@ -155,7 +160,8 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { ; SLM-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -2147483648) ; SLM-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], splat (i32 2147483647) ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP9]], <8 x i32> ; SLM-NEXT: [[DOTUNCASTED:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> ; SLM-NEXT: ret <8 x float> [[DOTUNCASTED]] ; @@ -233,7 +239,8 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @sext_zext( @@ -242,7 +249,8 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX-LABEL: @sext_zext( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll index 99b13bdc05082..06498563a7d37 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll @@ -15,7 +15,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[TMP5]] ; @@ -27,7 +28,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SLM-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; @@ -39,7 +41,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX-NEXT: ret <8 x float> [[TMP5]] ; @@ -99,7 +102,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[TMP5]] ; @@ -111,7 +115,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SLM-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; @@ -123,7 +128,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX-NEXT: ret <8 x float> [[TMP5]] ; @@ -135,7 +141,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX2-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX2-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX2-NEXT: ret <8 x float> [[TMP5]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll index 7f9475917b566..6275d984295c0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -15,7 +15,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[TMP5]] ; @@ -27,7 +28,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SLM-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; @@ -39,7 +41,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX-NEXT: ret <8 x float> [[TMP5]] ; @@ -99,7 +102,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[TMP5]] ; @@ -111,7 +115,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SLM-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; @@ -123,7 +128,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX-NEXT: ret <8 x float> [[TMP5]] ; @@ -135,7 +141,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX2-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX2-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX2-NEXT: ret <8 x float> [[TMP5]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll index 11ab7770a5383..d02df1ac92b4d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -15,7 +15,8 @@ define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP9]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @add_sub_v8i32( @@ -26,7 +27,8 @@ define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP9]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @add_sub_v8i32( @@ -143,7 +145,8 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP3]], [[TMP9]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @ashr_shl_v8i32( @@ -154,7 +157,8 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP3]], [[TMP9]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @ashr_shl_v8i32( @@ -217,7 +221,8 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SSE-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( @@ -226,7 +231,8 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @ashr_shl_v8i32_const( @@ -592,7 +598,8 @@ define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP7]] ; ; SLM-LABEL: @add_sub_v8i32_splat( @@ -603,7 +610,8 @@ define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; SLM-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP7]] ; ; AVX1-LABEL: @add_sub_v8i32_splat( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 9589ec24d49d4..d9a7586ecd23d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -15,7 +15,8 @@ define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP9]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @add_sub_v8i32( @@ -26,7 +27,8 @@ define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP9]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @add_sub_v8i32( @@ -143,7 +145,8 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP3]], [[TMP9]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @ashr_shl_v8i32( @@ -154,7 +157,8 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP3]], [[TMP9]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @ashr_shl_v8i32( @@ -217,7 +221,8 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SSE-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( @@ -226,7 +231,8 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @ashr_shl_v8i32_const( @@ -592,7 +598,8 @@ define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP7]] ; ; SLM-LABEL: @add_sub_v8i32_splat( @@ -603,7 +610,8 @@ define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; SLM-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP7]] ; ; AVX1-LABEL: @add_sub_v8i32_splat( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll index f2992cf044cd5..e1ee35217d187 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll @@ -40,9 +40,10 @@ define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) { ; SSE-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1 ; SSE-NEXT: [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0 ; SSE-NEXT: [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768 -; SSE-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> [[TMP3]], i64 0) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> ; SSE-NEXT: [[RDX_OP:%.*]] = or <4 x i64> [[TMP10]], [[TMP6]] -; SSE-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP3]], <4 x i64> [[RDX_OP]], i64 0) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[RDX_OP]], <4 x i64> poison, <8 x i32> +; SSE-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP11]]) ; SSE-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP16]], [[OR_13]] ; SSE-NEXT: [[OP_RDX5:%.*]] = or i64 [[OR_14]], [[OR_15]] @@ -75,9 +76,10 @@ define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) { ; AVX-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1 ; AVX-NEXT: [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0 ; AVX-NEXT: [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768 -; AVX-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> [[TMP3]], i64 0) +; AVX-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> ; AVX-NEXT: [[RDX_OP:%.*]] = or <4 x i64> [[TMP10]], [[TMP6]] -; AVX-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP3]], <4 x i64> [[RDX_OP]], i64 0) +; AVX-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[RDX_OP]], <4 x i64> poison, <8 x i32> +; AVX-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP13]], <8 x i32> ; AVX-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP11]]) ; AVX-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP12]], [[OR_13]] ; AVX-NEXT: [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]] @@ -110,9 +112,10 @@ define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) { ; AVX512-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1 ; AVX512-NEXT: [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0 ; AVX512-NEXT: [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768 -; AVX512-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> [[TMP3]], i64 0) +; AVX512-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> ; AVX512-NEXT: [[RDX_OP:%.*]] = or <4 x i64> [[TMP10]], [[TMP6]] -; AVX512-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP3]], <4 x i64> [[RDX_OP]], i64 0) +; AVX512-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[RDX_OP]], <4 x i64> poison, <8 x i32> +; AVX512-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP13]], <8 x i32> ; AVX512-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP11]]) ; AVX512-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP12]], [[OR_13]] ; AVX512-NEXT: [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll index 5d2f059a8cf41..ff0887cf12447 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll @@ -16,7 +16,8 @@ define void @test(ptr %0, i64 %1, i64 %2) { ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i64> poison, i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i64> [[TMP9]], i64 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP11]], <4 x i64> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP16]], <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP8]] ; CHECK-NEXT: br [[DOTPREHEADER_US_US:label %.*]] ; CHECK: [[_PREHEADER_US_US:.*:]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll index 7ed5f33c9dc6c..07fdc9d8dd2fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll @@ -17,7 +17,8 @@ define void @test() { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[CALL]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP4]], <4 x i1> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll index fa46bd3d83249..c8748f316f024 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll @@ -24,7 +24,8 @@ define void @test(ptr %0, i32 %add651) { ; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[ADD651]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP13]], <2 x i32> [[TMP10]], i64 2) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP19]], <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], splat (i32 1) ; CHECK-NEXT: [[SHR685:%.*]] = lshr i32 [[TMP2]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i16> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll b/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll index f16c879c451c2..4a8af6d03da06 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE4 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX ; ; dot4(ptr x, ptr y) - ((xptr y[0])+(xptr y[1])+(xptr y[2])+(xptr y[3])) @@ -347,14 +347,30 @@ define float @dot2f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %pt } define double @dot2f64_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) { -; CHECK-LABEL: @dot2f64_fast( -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 -; CHECK-NEXT: [[DOT01:%.*]] = fadd fast double [[TMP4]], [[TMP5]] -; CHECK-NEXT: ret double [[DOT01]] +; SSE2-LABEL: @dot2f64_fast( +; SSE2-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4 +; SSE2-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4 +; SSE2-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] +; SSE2-NEXT: [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[TMP3]]) +; SSE2-NEXT: ret double [[TMP4]] +; +; SSE4-LABEL: @dot2f64_fast( +; SSE4-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4 +; SSE4-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4 +; SSE4-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] +; SSE4-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; SSE4-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; SSE4-NEXT: [[DOT01:%.*]] = fadd fast double [[TMP4]], [[TMP5]] +; SSE4-NEXT: ret double [[DOT01]] +; +; AVX-LABEL: @dot2f64_fast( +; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4 +; AVX-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4 +; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] +; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; AVX-NEXT: [[DOT01:%.*]] = fadd fast double [[TMP4]], [[TMP5]] +; AVX-NEXT: ret double [[DOT01]] ; %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1 %ptry1 = getelementptr inbounds double, ptr %ptry, i64 1 @@ -369,14 +385,30 @@ define double @dot2f64_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(1 } define float @dot2f32_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) { -; CHECK-LABEL: @dot2f32_fast( -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[DOT01:%.*]] = fadd fast float [[TMP4]], [[TMP5]] -; CHECK-NEXT: ret float [[DOT01]] +; SSE2-LABEL: @dot2f32_fast( +; SSE2-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4 +; SSE2-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4 +; SSE2-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] +; SSE2-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[TMP3]]) +; SSE2-NEXT: ret float [[TMP4]] +; +; SSE4-LABEL: @dot2f32_fast( +; SSE4-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4 +; SSE4-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4 +; SSE4-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] +; SSE4-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; SSE4-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; SSE4-NEXT: [[DOT01:%.*]] = fadd fast float [[TMP4]], [[TMP5]] +; SSE4-NEXT: ret float [[DOT01]] +; +; AVX-LABEL: @dot2f32_fast( +; AVX-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4 +; AVX-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4 +; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] +; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; AVX-NEXT: [[DOT01:%.*]] = fadd fast float [[TMP4]], [[TMP5]] +; AVX-NEXT: ret float [[DOT01]] ; %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1 %ptry1 = getelementptr inbounds float, ptr %ptry, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll index 9d48e7f8a787a..bfb623ac5a9b9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll @@ -34,7 +34,8 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) { ; CHECK-NEXT: [[TMP23:%.*]] = fmul <4 x float> [[TMP19]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x float> [[TMP19]], zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> [[TMP24]], <4 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> , <2 x float> [[TMP22]], i64 2) +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x float> [[TMP22]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x float> , <4 x float> [[TMP28]], <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = fadd <4 x float> [[TMP25]], [[TMP26]] ; CHECK-NEXT: store <4 x float> [[TMP27]], ptr [[CALL25]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll index 55fe7d6ed52e5..77585965d68e9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll @@ -16,7 +16,8 @@ define i32 @test() { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> , <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> , <2 x i1> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i1> , <8 x i1> [[TMP14]], <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i32> [[TMP5]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP13]] to <8 x i8> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll index 20d7ba99fd515..3bf73034a1718 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll @@ -17,7 +17,8 @@ define i32 @test(ptr %c, i16 %a, i16 %0) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i16 [[A]], -2 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP9]], i32 7 -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP11]], <4 x i1> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> [[TMP17]], <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = freeze <8 x i1> [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP13]]) ; CHECK-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll index 0e08ef4d74308..18e03df0fbcc9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll @@ -10,7 +10,7 @@ define i32 @test() { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> , <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP4]], <4 x i64> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> @@ -19,9 +19,10 @@ define i32 @test() { ; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i32> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: [[INC_3_3_I_1:%.*]] = or i64 [[TMP9]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.v16i32(<16 x i32> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = or <8 x i32> [[TMP16]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP8]], <8 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[RDX_OP]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> [[TMP18]], <16 x i32> ; CHECK-NEXT: [[OP_RDX:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP17]]) ; CHECK-NEXT: ret i32 [[OP_RDX]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll index 992909fb3e87f..15ba98f90f0b8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll @@ -15,8 +15,9 @@ define i32 @test() { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v12i32(<16 x i32> poison, <12 x i32> [[TMP3]], i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP8]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <12 x i32> [[TMP3]], <12 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP17]], <16 x i32> [[TMP8]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = icmp ult <16 x i32> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP12]], <16 x i1> [[TMP13]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll index 2a54ae9a1e749..ce65f532e0b3b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll @@ -11,6 +11,7 @@ define i32 @test(i64 %l.549) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[L_549]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: br label %[[IF_THEN19:.*]] ; CHECK: [[P:.*]]: ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ zeroinitializer, %[[IF_END29:.*]] ], [ [[TMP13:%.*]], %[[IF_END25:.*]] ] @@ -18,20 +19,21 @@ define i32 @test(i64 %l.549) { ; CHECK-NEXT: br i1 false, label %[[S:.*]], label %[[Q:.*]] ; CHECK: [[Q]]: ; CHECK-NEXT: [[XOR39:%.*]] = phi i64 [ 0, %[[P]] ], [ 0, %[[LAND_LHS_TRUE:.*]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i64> [ zeroinitializer, %[[P]] ], [ zeroinitializer, %[[LAND_LHS_TRUE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i64> [ zeroinitializer, %[[P]] ], [ zeroinitializer, %[[LAND_LHS_TRUE]] ] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[XOR39]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP4]], <2 x i64> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i64> [[TMP16]], <2 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP18]], <4 x i32> ; CHECK-NEXT: br i1 false, label %[[LOR_LHS_FALSE:.*]], label %[[R:.*]] ; CHECK: [[LOR_LHS_FALSE]]: ; CHECK-NEXT: br i1 false, label %[[LAND_LHS_TRUE]], label %[[S]] ; CHECK: [[R]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i64> [ [[TMP7]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i64> [ [[TMP19]], %[[Q]] ], [ [[TMP20:%.*]], %[[IF_THEN19]] ] ; CHECK-NEXT: br i1 false, label %[[S]], label %[[LAND_LHS_TRUE]] ; CHECK: [[LAND_LHS_TRUE]]: -; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i64> [ [[TMP18]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i64> [ [[TMP21]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ] ; CHECK-NEXT: br i1 false, label %[[Q]], label %[[S]] ; CHECK: [[S]]: -; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP19]], %[[LAND_LHS_TRUE]] ], [ [[TMP18]], %[[R]] ], [ [[TMP7]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP22]], %[[LAND_LHS_TRUE]] ], [ [[TMP21]], %[[R]] ], [ [[TMP19]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> ; CHECK-NEXT: br label %[[IF_THEN19]] ; CHECK: [[IF_THEN19]]: @@ -39,7 +41,7 @@ define i32 @test(i64 %l.549) { ; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP16]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP15]], <2 x i64> [[TMP2]], i64 2) +; CHECK-NEXT: [[TMP20]] = shufflevector <4 x i64> [[TMP15]], <4 x i64> [[TMP6]], <4 x i32> ; CHECK-NEXT: br i1 false, label %[[R]], label %[[IF_END25]] ; CHECK: [[IF_END25]]: ; CHECK-NEXT: br i1 false, label %[[IF_END29]], label %[[P]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll index 19c29be1ef384..4f62a8d24387f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll @@ -18,7 +18,8 @@ define void @foo(ptr %i7, i32 %0, i1 %tobool62.not) { ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[RC21]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 2 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP9]], <2 x float> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = fcmp olt <4 x float> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fcmp olt <4 x float> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP5]], <4 x float> zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index 2484a2d2193fc..eaa77d74f8df1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -605,9 +605,10 @@ define float @loadadd31(ptr nocapture readonly %x) { ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4 -; CHECK-NEXT: [[RDX_OP2:%.*]] = call fast <4 x float> @llvm.vector.extract.v4f32.v24f32(<24 x float> [[TMP0]], i64 0) +; CHECK-NEXT: [[RDX_OP2:%.*]] = shufflevector <24 x float> [[TMP0]], <24 x float> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP3:%.*]] = fadd fast <4 x float> [[RDX_OP2]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = call fast <24 x float> @llvm.vector.insert.v24f32.v4f32(<24 x float> [[TMP0]], <4 x float> [[RDX_OP3]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RDX_OP3]], <4 x float> poison, <24 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <24 x float> [[TMP0]], <24 x float> [[TMP6]], <24 x i32> ; CHECK-NEXT: [[OP_RDX1:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]] ; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]] @@ -623,9 +624,10 @@ define float @loadadd31(ptr nocapture readonly %x) { ; THRESHOLD-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4 -; THRESHOLD-NEXT: [[RDX_OP2:%.*]] = call fast <4 x float> @llvm.vector.extract.v4f32.v24f32(<24 x float> [[TMP0]], i64 0) +; THRESHOLD-NEXT: [[RDX_OP2:%.*]] = shufflevector <24 x float> [[TMP0]], <24 x float> poison, <4 x i32> ; THRESHOLD-NEXT: [[RDX_OP3:%.*]] = fadd fast <4 x float> [[RDX_OP2]], [[TMP2]] -; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast <24 x float> @llvm.vector.insert.v24f32.v4f32(<24 x float> [[TMP0]], <4 x float> [[RDX_OP3]], i64 0) +; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RDX_OP3]], <4 x float> poison, <24 x i32> +; THRESHOLD-NEXT: [[TMP5:%.*]] = shufflevector <24 x float> [[TMP0]], <24 x float> [[TMP6]], <24 x i32> ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP5]]) ; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]] ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index ca662b838938f..b7bd3e41b0d29 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -977,9 +977,12 @@ define i32 @maxi8_wrong_parent(i32) { ; SSE4: pp: ; SSE4-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8 ; SSE4-NEXT: [[TMP8:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8 -; SSE4-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP4]], i64 0) -; SSE4-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP8]], i64 4) -; SSE4-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP2]], i64 6) +; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <8 x i32> +; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> +; SSE4-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP10]], <8 x i32> +; SSE4-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP9]], <8 x i32> ; SSE4-NEXT: [[OP_RDX7:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP7]]) ; SSE4-NEXT: ret i32 [[OP_RDX7]] ; @@ -989,8 +992,9 @@ define i32 @maxi8_wrong_parent(i32) { ; AVX: pp: ; AVX-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8 ; AVX-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP7]], i64 0) -; AVX-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP5]], <2 x i32> [[TMP2]], i64 2) +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> +; AVX-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP2]], <4 x i32> ; AVX-NEXT: [[RDX_OP:%.*]] = icmp sgt <4 x i32> [[TMP4]], [[TMP6]] ; AVX-NEXT: [[RDX_OP1:%.*]] = select <4 x i1> [[RDX_OP]], <4 x i32> [[TMP4]], <4 x i32> [[TMP6]] ; AVX-NEXT: [[OP_RDX7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_OP1]]) @@ -1002,9 +1006,12 @@ define i32 @maxi8_wrong_parent(i32) { ; THRESH: pp: ; THRESH-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8 ; THRESH-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8 -; THRESH-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0) -; THRESH-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP4]], i64 4) -; THRESH-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP2]], i64 6) +; THRESH-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> +; THRESH-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> +; THRESH-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> +; THRESH-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP10]], <8 x i32> +; THRESH-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; THRESH-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP9]], <8 x i32> ; THRESH-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP7]]) ; THRESH-NEXT: ret i32 [[TMP8]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll b/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll index d6f0b7692bdd9..f07424f0d2934 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll @@ -20,8 +20,10 @@ define i32 @test() { ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <64 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <64 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <64 x i32> [[TMP13]], <64 x i32> [[TMP15]], <64 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v24i32(<64 x i32> [[TMP16]], <24 x i32> [[TMP6]], i64 24) -; CHECK-NEXT: [[TMP18:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v16i32(<64 x i32> [[TMP17]], <16 x i32> [[TMP4]], i64 16) +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <24 x i32> [[TMP6]], <24 x i32> poison, <64 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <64 x i32> [[TMP16]], <64 x i32> [[TMP15]], <64 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <64 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <64 x i32> [[TMP27]], <64 x i32> [[TMP28]], <64 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <64 x i32> zeroinitializer, [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <64 x i32> zeroinitializer, [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <64 x i1> [[TMP19]], <64 x i1> [[TMP20]], <64 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll index 80b62c3cfffac..0fddb7322e9b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll @@ -15,14 +15,17 @@ define <16 x double> @test(ptr %x, double %v, double %a) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v6f64(<16 x double> poison, <6 x double> [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <6 x double> [[TMP1]], <6 x double> poison, <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP12]], <2 x double> [[TMP6]], i64 6) -; CHECK-NEXT: [[TMP14:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP13]], <2 x double> [[TMP7]], i64 8) -; CHECK-NEXT: [[TMP15:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP14]], <2 x double> [[TMP9]], i64 10) -; CHECK-NEXT: [[TMP16:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP15]], <2 x double> [[TMP9]], i64 12) -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP16]], <2 x double> [[TMP9]], i64 14) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x double> [[TMP12]], <16 x double> [[TMP13]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x double> [[TMP14]], <16 x double> [[TMP15]], <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x double> [[TMP16]], <16 x double> [[TMP20]], <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x double> [[TMP21]], <16 x double> [[TMP20]], <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x double> [[TMP19]], <16 x double> [[TMP20]], <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]] ; CHECK-NEXT: ret <16 x double> [[TMP18]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll index 54c950a078502..48b657e8bf6e5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll @@ -11,7 +11,8 @@ define void @inst_size(ptr %a, <2 x i64> %b) { ; CHECK-NEXT: [[TMPL4:%.*]] = load i64, ptr [[PTR4]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMPL1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP2]], <2 x i64> [[TMP0]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> zeroinitializer, [[TMP3]] ; CHECK-NEXT: [[T45:%.*]] = icmp sgt i64 0, [[TMPL4]] ; CHECK-NEXT: br label [[BLOCK:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll index d6552adbd4abf..6c729d17c1a9b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll @@ -29,14 +29,15 @@ define void @test(i32 %arg) personality ptr null { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[PHI6]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[PHI7]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP7]], i64 4) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; CHECK-NEXT: br label %[[BB11:.*]] ; CHECK: [[BB9:.*]]: ; CHECK-NEXT: [[LANDINGPAD10:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label %[[BB11]] ; CHECK: [[BB11]]: -; CHECK-NEXT: [[TMP10:%.*]] = phi <8 x i32> [ poison, %[[BB9]] ], [ [[TMP9]], %[[BB5]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi <8 x i32> [ poison, %[[BB9]] ], [ [[TMP10]], %[[BB5]] ] ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll index ccb7e9b514cf1..842bd6c6bec37 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll @@ -108,9 +108,10 @@ define i64 @test_3() #0 { ; CHECK-NEXT: [[VAL4:%.*]] = extractelement <28 x i32> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> poison, i32 [[VAL4]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call <28 x i32> @llvm.vector.extract.v28i32.v32i32(<32 x i32> [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <28 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = mul <28 x i32> [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v28i32(<32 x i32> [[TMP1]], <28 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <28 x i32> [[RDX_OP]], <28 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> [[TMP7]], <32 x i32> ; CHECK-NEXT: [[OP_RDX27:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP6]]) ; CHECK-NEXT: [[VAL64:%.*]] = add i32 3, [[OP_RDX27]] ; CHECK-NEXT: [[VAL65:%.*]] = sext i32 [[VAL64]] to i64 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll index 289c6002851d7..f56af934f19f5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll @@ -22,9 +22,12 @@ define i32 @test(i32 %s.0) { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP7]], <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP8]], <2 x i32> [[TMP2]], i64 2) -; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP9]], <2 x i32> [[TMP3]], i64 4) -; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP10]], <2 x i32> [[TMP5]], i64 6) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <8 x i32> [[TMP27]], <8 x i32> [[TMP30]], <8 x i32> ; CHECK-NEXT: br i1 false, label %[[IF_END24:.*]], label %[[IF_THEN11:.*]] ; CHECK: [[IF_THEN11]]: ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> @@ -37,11 +40,11 @@ define i32 @test(i32 %s.0) { ; CHECK: [[IF_THEN18:.*]]: ; CHECK-NEXT: br label %[[T]] ; CHECK: [[T]]: -; CHECK-NEXT: [[TMP30:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ] +; CHECK-NEXT: [[TMP34:%.*]] = phi <8 x i32> [ [[TMP33:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ] ; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0 ; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]] ; CHECK: [[IF_END24]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP30]], %[[T]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP31]], %[[IF_END6]] ], [ [[TMP34]], %[[T]] ] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> @@ -52,7 +55,8 @@ define i32 @test(i32 %s.0) { ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP21]], %[[IF_END24]] ] ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP25]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP27]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP26]], <4 x i32> [[TMP23]], i64 4) +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP33]] = shufflevector <8 x i32> [[TMP26]], <8 x i32> [[TMP32]], <8 x i32> ; CHECK-NEXT: [[TMP28]] = extractelement <4 x i32> [[TMP24]], i32 3 ; CHECK-NEXT: br i1 false, label %[[T]], label %[[IF_END6]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index ea497c95d4114..1abc8102dc332 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -22,7 +22,8 @@ define i32 @bar() local_unnamed_addr { ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <8 x i32> [[TMP7]], [[TMP9]] ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP18]], <8 x i32> [[TMP10]], i64 8) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP18]], <16 x i32> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], splat (i32 15) ; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i32> [[TMP12]], splat (i32 65537) ; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], splat (i32 65535) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 76104efc1bb78..6da0ecef5cd96 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -8,7 +8,7 @@ define void @test(i64 %d.promoted.i) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[D_PROMOTED_I]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i1> -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v2i1(<16 x i1> poison, <2 x i1> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i1> [[TMP4]], <16 x i1> , <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i1> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP6]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll index f7d78be4f13ca..a9f2ed61d9ee4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll @@ -18,8 +18,8 @@ define i64 @test() { ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> , i32 0, i32 6 -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> , i64 24) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> , <32 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sub <32 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP3]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP4]], 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll index e9a65bf6d6f0d..7df97492b874b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll @@ -10,7 +10,8 @@ define i1 @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> zeroinitializer, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> , <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> , <4 x i1> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> , <8 x i1> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP5]], i1 false diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll index 4ad02d47fb385..f1bd3384f0488 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll @@ -6,7 +6,7 @@ define i64 @test() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[OR54_I_I_6:%.*]] = or i32 0, 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8 -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> , <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll index 355f5306ee4db..04359eb6fcd7c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll @@ -16,8 +16,10 @@ define void @e(ptr %c, i64 %0) { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x ptr> poison, ptr [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <6 x ptr> [[TMP7]], ptr [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP8]], <2 x ptr> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP10:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP9]], <2 x ptr> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <6 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <6 x ptr> [[TMP8]], <6 x ptr> [[TMP19]], <6 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <6 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <6 x ptr> [[TMP20]], <6 x ptr> [[TMP21]], <6 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint <6 x ptr> [[TMP10]] to <6 x i64> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <6 x i64> [[TMP11]], <6 x i64> poison, <32 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i64> poison, i64 [[TMP0]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll index 70b7f14a3a2c9..1fedde4cc9fd7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll @@ -7,7 +7,7 @@ define void @test() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x i64> , i64 [[XOR108_I_I_I]], i32 10 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <12 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v12i64(<16 x i64> poison, <12 x i64> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <12 x i64> [[TMP2]], <12 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll index 382d6ae0e0a6f..652abef14771d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll @@ -12,11 +12,11 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = zext i1 false to i64 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> , <2 x i64> [[TMP2]], i64 2) -; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> , <2 x i64> [[TMP2]], i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> , <4 x i64> [[TMP3]], <4 x i32> ; CHECK-NEXT: br i1 false, label %[[BB5]], label %[[BB2:.*]] ; CHECK: [[BB5]]: -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i64> [ [[TMP3]], %[[BB1]] ], [ poison, %[[IF_THEN_I_I]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i64> [ [[TMP4]], %[[BB1]] ], [ poison, %[[IF_THEN_I_I]] ] ; CHECK-NEXT: br label %[[BB2]] ; CHECK: [[BB2]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP6]], %[[BB5]] ], [ [[TMP4]], %[[BB1]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll index eaf7bb2c9fdce..98ea4db6f6492 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll @@ -17,7 +17,8 @@ define i64 @test(i256 %0, { i32, i1 } %1) { ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP7]], i32 2 ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP13]], <4 x i32> [[TMP12]], i64 4) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP19]], <8 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i32> [[TMP14]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i1> [[TMP15]] to i8 ; CHECK-NEXT: [[TMP17:%.*]] = call i8 @llvm.ctpop.i8(i8 [[TMP16]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll index 232e458504188..7206293444d55 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll @@ -11,7 +11,8 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i1> [ poison, %[[CONT221_THREAD781]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[TMP0]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> , <4 x i1> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> , <8 x i1> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i64 0, [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll index 048d2814b9abb..d62623047763f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll @@ -8,19 +8,20 @@ define void @test(ptr %0, i1 %1, i1 %2) { ; CHECK: [[BB4]]: ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP12:%.*]], %[[TMP7:.*]] ], [ zeroinitializer, [[TMP3:%.*]] ] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: br i1 [[TMP1]], label %[[TMP7]], label %[[BB14:.*]] +; CHECK-NEXT: br i1 [[TMP1]], label %[[TMP7]], label %[[BB15:.*]] ; CHECK: [[TMP7]]: ; CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x i32>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i32> [[TMP10]], splat (i32 1) ; CHECK-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> , <2 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> , <2 x i32> [[TMP11]], i64 2) -; CHECK-NEXT: br i1 [[TMP2]], label %[[BB15:.*]], label %[[BB4]] -; CHECK: [[BB14]]: -; CHECK-NEXT: br label %[[BB15]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP13]], <4 x i32> +; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16:.*]], label %[[BB4]] ; CHECK: [[BB15]]: -; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP6]], %[[BB14]] ], [ [[TMP13]], %[[TMP7]] ] +; CHECK-NEXT: br label %[[BB16]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP6]], %[[BB15]] ], [ [[TMP14]], %[[TMP7]] ] ; CHECK-NEXT: [[TMP17:%.*]] = load volatile ptr, ptr null, align 8 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i64 176 ; CHECK-NEXT: store <4 x i32> [[TMP16]], ptr [[TMP18]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll index 5baa5f3cdcdae..e35491823cc55 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll @@ -4,10 +4,7 @@ define i16 @test() { ; CHECK-LABEL: define i16 @test() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> zeroinitializer, <4 x i16> [[RDX_OP]], i64 0) -; CHECK-NEXT: [[OP_RDX:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> zeroinitializer) ; CHECK-NEXT: [[OP_RDX1:%.*]] = or i16 [[OP_RDX]], 0 ; CHECK-NEXT: ret i16 [[OP_RDX1]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 81da11dc42e88..1904540c23146 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -210,7 +210,8 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42) ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP3]], <4 x i1> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) ; CHECK-NEXT: ret i1 [[TMP6]] @@ -244,7 +245,8 @@ define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { ; CHECK-NEXT: call void @use1(i1 [[TMP5]]) ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP8]], <4 x i1> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP8]], <8 x i1> [[TMP9]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]]) ; CHECK-NEXT: ret i1 [[TMP7]] @@ -316,7 +318,8 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> , <4 x i32> [[TMP2]], i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) @@ -392,7 +395,7 @@ define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP2]], <4 x i32> [[X]], i64 0) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <8 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <8 x i32> [[TMP3]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll index 799533824c5aa..fe5f4deecb8b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll @@ -9,19 +9,16 @@ define i32 @test(i32 %arg) { ; CHECK-NEXT: br label %[[BB1:.*]] ; CHECK: [[BB1]]: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[OP_RDX:%.*]], %[[BB1]] ] -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> , <2 x i64> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i64> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i64> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> zeroinitializer, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP0]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul <2 x i32> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP7]], i64 0) +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = mul <4 x i32> [[TMP11]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[RDX_OP]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP14]], <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP12]]) ; CHECK-NEXT: [[OP_RDX]] = mul i32 0, [[TMP13]] ; CHECK-NEXT: br label %[[BB1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll index 8aaa71ef47a8c..c258c7d54df82 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll @@ -20,7 +20,8 @@ define <4 x float> @test(ptr %x, float %v, float %a) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[V]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP3]], [[TMP7]] ; CHECK-NEXT: ret <4 x float> [[TMP8]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll index 02058b1fe8578..19ce11c457f63 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll @@ -15,7 +15,8 @@ define void @test() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP6]], <2 x i32> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr <4 x i32> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> @@ -75,7 +76,8 @@ define void @test1() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP12]], <2 x i32> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP14]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], zeroinitializer @@ -137,7 +139,8 @@ define void @test_div() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP11]], <2 x i32> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = udiv <4 x i32> [[TMP9]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> @@ -197,7 +200,8 @@ define void @test_rem() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP11]], <2 x i32> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = urem <4 x i32> [[TMP9]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll index af9d808f45fa1..3f6ec8ccad4ee 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll @@ -56,7 +56,8 @@ define void @test(i32 %j.6, i32 %m.4, i8 %v.5, ptr %a, i1 %tobool14.not) { ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[SUB13]], i32 1 ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i32> [[TMP26]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP27]], <4 x i32> [[TMP23]], i64 4) +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <8 x i32> [[TMP27]], <8 x i32> [[TMP34]], <8 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP28]], <8 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP29]], <2 x i32> , <2 x i32> ; CHECK-NEXT: br i1 [[TOBOOL14_NOT]], label %[[IF_END18]], label %[[Q]] @@ -68,11 +69,13 @@ define void @test(i32 %j.6, i32 %m.4, i8 %v.5, ptr %a, i1 %tobool14.not) { ; CHECK-NEXT: [[CONV17:%.*]] = sext i8 [[V_44]] to i32 ; CHECK-NEXT: [[REM:%.*]] = mul i32 [[U_4]], [[CONV17]] ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i32> , i32 [[REM]], i32 5 -; CHECK-NEXT: [[TMP34:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP33]], <4 x i32> [[TMP32]], i64 0) +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP32]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <2 x i32> [[TMP31]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <8 x i32> [[TMP33]], <8 x i32> [[TMP39]], <8 x i32> ; CHECK-NEXT: br label %[[IF_END18]] ; CHECK: [[IF_END18]]: ; CHECK-NEXT: [[L_4]] = phi i8 [ 0, %[[Q]] ], [ [[L_3_PH]], %[[O]] ] -; CHECK-NEXT: [[TMP35]] = phi <8 x i32> [ [[TMP34]], %[[Q]] ], [ [[TMP28]], %[[O]] ] +; CHECK-NEXT: [[TMP35]] = phi <8 x i32> [ [[TMP40]], %[[Q]] ], [ [[TMP28]], %[[O]] ] ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <8 x i32> [[TMP35]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP37]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP36]], <4 x i32> ; CHECK-NEXT: br i1 [[TOBOOL14_NOT]], label %[[N]], label %[[P]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll index 52e13de8118d7..61294089fd4cb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll @@ -16,9 +16,10 @@ define void @test(i32 %0, ptr %p) { ; CHECK: exit: ; CHECK-NEXT: [[TMP9:%.*]] = phi <8 x i32> [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP6]], [[PH]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ [[TMP5]], [[ENTRY]] ], [ zeroinitializer, [[PH]] ] -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP9]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i32> [[TMP10]], [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP9]], <4 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[RDX_OP]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP12]], <8 x i32> ; CHECK-NEXT: [[OP_RDX5:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP11]]) ; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX5]], [[OP_RDX]] ; CHECK-NEXT: store i32 [[OP_RDX2]], ptr [[P]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index ef1149a108e29..20a42777cf8e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -7,7 +7,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(i1 %arg) unnamed_addr #0 align 2 { ; CHECK-LABEL: @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %arg, label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]] ; CHECK: if.then22.i: ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] @@ -24,11 +24,14 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(i ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_I_I]] to i8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP10]], <8 x i8> [[TMP11]], i64 8) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i8> [[TMP11]], <8 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP12]], <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP12]], <4 x i8> [[TMP13]], i64 4) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> -; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v2i8(<16 x i8> [[TMP14]], <2 x i8> [[TMP15]], i64 2) +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i8> [[TMP15]], <2 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP18]], <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], splat (i8 1) ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll index e56131b4681e3..92a1e289044d7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll @@ -32,14 +32,12 @@ define <16 x half> @test(i32 %0, float %1, i32 %2) { ; CHECK-NEXT: [[TMP29:%.*]] = sitofp <16 x i32> [[TMP28]] to <16 x float> ; CHECK-NEXT: [[TMP30:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP29]], <16 x float> zeroinitializer, <16 x float> zeroinitializer) ; CHECK-NEXT: [[TMP31:%.*]] = fadd <16 x float> [[TMP30]], zeroinitializer -; CHECK-NEXT: [[TMP32:%.*]] = call <12 x i1> @llvm.vector.insert.v12i1.v2i1(<12 x i1> poison, <2 x i1> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <12 x i1> [[TMP32]], <12 x i1> , <12 x i32> -; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <12 x i1> [[TMP33]], <12 x i1> poison, <16 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = select <16 x i1> [[TMP34]], <16 x float> zeroinitializer, <16 x float> [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = select <16 x i1> zeroinitializer, <16 x float> zeroinitializer, <16 x float> [[TMP31]] ; CHECK-NEXT: [[TMP36:%.*]] = bitcast <16 x float> [[TMP35]] to <16 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = and <16 x i32> [[TMP36]], zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = bitcast <16 x i32> [[TMP37]] to <16 x float> -; CHECK-NEXT: [[TMP39:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> , <2 x float> [[TMP6]], i64 14) +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> , <16 x float> [[TMP53]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> zeroinitializer, <16 x float> [[TMP38]], <16 x float> [[TMP39]]) ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x float> [[TMP29]], i32 0 ; CHECK-NEXT: [[TMP42:%.*]] = fcmp olt float [[TMP41]], 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll index 976de7cc8c21f..f98ed81b087b5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll @@ -4,30 +4,25 @@ define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> poison, <4 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP0]], <4 x i32> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP2]], <4 x i32> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP3]], <4 x i32> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP4]], <4 x i32> zeroinitializer, i64 20) -; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP5]], <4 x i32> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP6]], <4 x i32> zeroinitializer, i64 28) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <32 x i32> [[TMP7]] to <32 x i1> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP13:%.*]] = phi <32 x i1> [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi <32 x i1> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NARROW:%.*]] = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: [[NARROW66:%.*]] = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: [[NARROW67:%.*]] = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: [[NARROW68:%.*]] = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> poison, <4 x i1> [[NARROW]], i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP10]], <4 x i1> [[NARROW66]], i64 4) -; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP11]], <4 x i1> [[NARROW67]], i64 8) -; CHECK-NEXT: [[TMP19:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP12]], <4 x i1> [[NARROW68]], i64 12) -; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP19]], <4 x i1> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP14]], <4 x i1> zeroinitializer, i64 20) -; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP15]], <4 x i1> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP16]], <4 x i1> zeroinitializer, i64 28) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[NARROW]], <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i1> [[NARROW66]], <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i1> [[TMP1]], <32 x i1> [[TMP2]], <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[NARROW67]], <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i1> [[TMP3]], <32 x i1> [[TMP4]], <32 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i1> [[NARROW68]], <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i1> [[TMP5]], <32 x i1> [[TMP6]], <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> zeroinitializer, <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i1> [[TMP7]], <32 x i1> [[TMP8]], <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i1> [[TMP9]], <32 x i1> [[TMP8]], <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <32 x i1> [[TMP10]], <32 x i1> [[TMP8]], <32 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i1> [[TMP11]], <32 x i1> [[TMP8]], <32 x i32> ; CHECK-NEXT: [[TMP18]] = or <32 x i1> [[TMP13]], [[TMP17]] ; CHECK-NEXT: br label [[VECTOR_BODY]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll index 3aea112e9edfe..14bdcd062edf8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll @@ -4,9 +4,7 @@ define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP0]], <4 x i8> zeroinitializer, i64 4) -; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr null, align 1 +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr null, align 1 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll index 069274df396d7..4990fe102564a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll @@ -8,10 +8,7 @@ define void @test(ptr %in) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[IN]], i64 64 ; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr [[TMP1]], i32 2, <32 x i1> , <32 x i16> poison) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP4]], <8 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i16> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i16> zeroinitializer, [[TMP3]] ; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP0]], align 2 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll index 3d0e6be661fd1..8f6a53c03ac68 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll @@ -4,31 +4,17 @@ define <4 x i16> @test() { ; CHECK-LABEL: define <4 x i16> @test() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP0]], <4 x i16> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP4]], <4 x i16> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP6]], <4 x i16> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP7]], <4 x i16> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i16> [[TMP8]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = add <16 x i16> [[TMP3]], [[TMP8]] -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP24]]) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> zeroinitializer, <4 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i16> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP25]], i64 0 -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP27]]) +; CHECK-NEXT: [[TMP28:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP28]], i64 1 -; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP31:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP30]]) +; CHECK-NEXT: [[TMP31:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP29]], i16 [[TMP31]], i64 2 -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP34:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP33]]) +; CHECK-NEXT: [[TMP34:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP34]], i64 3 -; CHECK-NEXT: [[RDX_OP:%.*]] = or <16 x i16> [[TMP11]], [[TMP9]] +; CHECK-NEXT: [[RDX_OP:%.*]] = or <16 x i16> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i16> [[RDX_OP]], <16 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP36]]) ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> poison, i16 [[TMP37]], i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll b/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll index 3b9222b7d5ed1..9c0f65ec27165 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll @@ -35,13 +35,15 @@ define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) { ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP21]], <4 x i32> [[TMP10]], i64 4) +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i32> [[TMP21]], <8 x i32> [[TMP24]], <8 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP23]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = add <8 x i32> [[TMP18]], [[TMP22]] -; CHECK-NEXT: [[TMP20:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP19]], i64 0) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP19]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i32> [[TMP20]], [[TMP16]] -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP19]], <4 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[RDX_OP]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP19]], <8 x i32> [[TMP25]], <8 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP12]]) ; CHECK-NEXT: ret i32 [[TMP17]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll index 4cf2f99e60aeb..8dc8db9b444dc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll @@ -89,8 +89,9 @@ define dso_local void @test_unordered_splits(ptr nocapture %p) local_unnamed_add ; CHECK-NEXT: [[G20:%.*]] = getelementptr inbounds [16 x i32], ptr [[P2]], i32 0, i64 12 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[G10]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[G20]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP2]], <4 x i32> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void @@ -147,10 +148,13 @@ define dso_local void @test_cost_splits(ptr nocapture %p) local_unnamed_addr { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[G12]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[G20]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[G22]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> poison, <2 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP4]], <2 x i32> [[TMP1]], i64 2) -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP2]], i64 4) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP3]], i64 6) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP10]], <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[TMP7]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll index 10e73b042f19b..f6bf138944749 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll @@ -18,20 +18,21 @@ define void @test(double %0) { ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <6 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = call <6 x double> @llvm.vector.insert.v6f64.v2f64(<6 x double> [[TMP11]], <2 x double> [[TMP10]], i64 4) +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <6 x double> [[TMP11]], <6 x double> [[TMP13]], <6 x i32> ; CHECK-NEXT: br i1 false, label %[[DOTLR_PH272_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]] ; CHECK: [[_LR_PH272_PREHEADER:.*:]] -; CHECK-NEXT: br i1 false, [[DOT_CRIT_EDGE]], label %[[BB13:.*]] -; CHECK: [[BB13]]: +; CHECK-NEXT: br i1 false, [[DOT_CRIT_EDGE]], label %[[BB14:.*]] +; CHECK: [[BB14]]: ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <6 x double> [[TMP12]], <6 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <6 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = call <6 x double> @llvm.vector.insert.v6f64.v2f64(<6 x double> [[TMP15]], <2 x double> splat (double 0x7FF8000000000000), i64 4) -; CHECK-NEXT: br i1 false, label %[[BB17:.*]], [[DOT_CRIT_EDGE]] -; CHECK: [[BB17]]: +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <6 x double> [[TMP15]], <6 x double> , <6 x i32> +; CHECK-NEXT: br i1 false, label %[[BB18:.*]], [[DOT_CRIT_EDGE]] +; CHECK: [[BB18]]: ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <6 x double> , double [[TMP0]], i32 3 ; CHECK-NEXT: br [[DOT_CRIT_EDGE]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[TMP19:%.*]] = phi <6 x double> [ [[TMP12]], %[[BB7]] ], [ [[TMP18]], %[[BB17]] ], [ [[TMP16]], %[[BB13]] ], [ [[TMP12]], %[[DOTLR_PH272_PREHEADER]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi <6 x double> [ [[TMP12]], %[[BB7]] ], [ [[TMP18]], %[[BB18]] ], [ [[TMP17]], %[[BB14]] ], [ [[TMP12]], %[[DOTLR_PH272_PREHEADER]] ] ; CHECK-NEXT: ret void ; .thread: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll index 9abb994db1e73..680f950fae975 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll @@ -15,7 +15,8 @@ define i1 @test(ptr %0, ptr %1, <2 x float> %2, <2 x float> %3, <2 x float> %4) ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP9]], i32 7 ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP16]], <8 x float> [[TMP15]], i64 8) +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> [[TMP23]], <16 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> [[TMP12]], <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x float> [[TMP19]], float [[TMP9]], i32 15 ; CHECK-NEXT: [[TMP20:%.*]] = fmul <16 x float> [[TMP18]], [[TMP17]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll index 5491e8ea7e0f8..cd3663e28eb75 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll @@ -28,10 +28,11 @@ define void @test(i32 %arg) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LOAD3]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[LOAD2]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP5]], <4 x i32> [[TMP4]], i64 4) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: br label %[[BB12]] ; CHECK: [[BB12]]: -; CHECK-NEXT: [[TMP7:%.*]] = phi <8 x i32> [ [[TMP6]], %[[BB8]] ], [ poison, %[[BB6]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi <8 x i32> [ [[TMP7]], %[[BB8]] ], [ poison, %[[BB6]] ] ; CHECK-NEXT: ret void ; CHECK: [[BB21]]: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll index 5bfbd69330564..8e09847e9264e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll @@ -27,7 +27,8 @@ define void @test(i32 %0, i8 %1, i64 %2, float %3) { ; CHECK-NEXT: [[TMP25:%.*]] = sitofp <2 x i32> [[TMP24]] to <2 x float> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP27]], <4 x float> [[TMP16]], i64 4) +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <8 x float> [[TMP27]], <8 x float> [[TMP51]], <8 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = fdiv <8 x float> zeroinitializer, [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[TMP29]]) ; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x float> [[TMP30]] to <8 x i32> @@ -50,19 +51,21 @@ define void @test(i32 %0, i8 %1, i64 %2, float %3) { ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i64 0, i64 8388608 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP32]], i32 1 ; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i64 0, i64 32768 -; CHECK-NEXT: br label %[[BB52:.*]] -; CHECK: [[BB51:.*]]: -; CHECK-NEXT: unreachable -; CHECK: [[BB52]]: ; CHECK-NEXT: br label %[[BB53:.*]] +; CHECK: [[BB52:.*]]: +; CHECK-NEXT: unreachable ; CHECK: [[BB53]]: +; CHECK-NEXT: br label %[[BB54:.*]] +; CHECK: [[BB54]]: ; CHECK-NEXT: [[TMP54:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP17]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 0, ptr null) ; CHECK-NEXT: [[TMP55:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP21]]) ; CHECK-NEXT: [[TMP56:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <8 x float> [[TMP56]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP58:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP57]], <2 x float> [[TMP55]], i64 0) -; CHECK-NEXT: [[TMP59:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP58]], <2 x float> [[TMP54]], i64 6) +; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <2 x float> [[TMP55]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <8 x float> [[TMP57]], <8 x float> [[TMP87]], <8 x i32> +; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <2 x float> [[TMP54]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <8 x float> [[TMP88]], <8 x float> [[TMP89]], <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = bitcast <8 x float> [[TMP59]] to <8 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = icmp ult <8 x i32> [[TMP60]], splat (i32 1325400064) ; CHECK-NEXT: [[TMP62:%.*]] = extractelement <8 x i1> [[TMP61]], i32 5 @@ -94,7 +97,7 @@ define void @test(i32 %0, i8 %1, i64 %2, float %3) { ; CHECK-NEXT: [[TMP85:%.*]] = or i64 [[TMP84]], [[TMP48]] ; CHECK-NEXT: [[TMP86:%.*]] = or i64 [[TMP85]], [[TMP81]] ; CHECK-NEXT: store i64 [[TMP86]], ptr null, align 1 -; CHECK-NEXT: br label %[[BB51]] +; CHECK-NEXT: br label %[[BB52]] ; %5 = and i64 %2, 255 %6 = and i64 %2, -65536 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll index dd804663ff121..972a58cecc822 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll @@ -10,11 +10,13 @@ define void @test(ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX7_US_I_1261]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> , <4 x i32> [[TMP2]], i64 4) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> , <16 x i32> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX7_US_I_841]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <12 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <12 x i32> @llvm.vector.insert.v12i32.v4i32(<12 x i32> [[TMP6]], <4 x i32> [[TMP5]], i64 8) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <12 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <12 x i32> [[TMP6]], <12 x i32> [[TMP20]], <12 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i32> , <16 x i32> [[TMP9]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll index d07353798edc9..3bafc3c6552f2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll @@ -15,12 +15,14 @@ define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[V3]], i32 0 ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP30]], <2 x i32> [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP30]], <4 x i32> [[TMP31]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = uitofp <4 x i32> [[TMP10]] to <4 x float> ; CHECK-NEXT: [[TMP12:%.*]] = fdiv <4 x float> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> poison, i1 [[V4]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP14]], <2 x i1> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <2 x i1> [[TMP6]], <2 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i1> [[TMP14]], <4 x i1> [[TMP32]], <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x float> zeroinitializer, <4 x float> [[TMP12]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 ; CHECK-NEXT: [[CONV_I_I1743_3:%.*]] = fptoui float [[TMP17]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll index 3eabed5882e58..6073a264b9b12 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll @@ -160,7 +160,8 @@ define void @tiny_tree_not_fully_vectorizable2(ptr noalias nocapture %dst, ptr n ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> [[TMP2]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[DST_ADDR_022]], align 4 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, ptr [[SRC_ADDR_021]], i64 [[I_023]] ; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds float, ptr [[DST_ADDR_022]], i64 [[I_023]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll index 4b62ef688ca44..4c295355617e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll @@ -4,16 +4,7 @@ define i16 @test() { ; CHECK-LABEL: define i16 @test() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> , <2 x i1> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> zeroinitializer, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> zeroinitializer, <4 x i1> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i1> [[TMP7]] to <4 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: ret i16 [[TMP9]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll index a821362a883a1..fd3c1a57aff34 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll @@ -7,7 +7,8 @@ define void @vec3_vectorize_call(ptr %Colour, float %0) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4 ; NON-POW2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 2 -; NON-POW2-NEXT: [[TMP4:%.*]] = call <3 x float> @llvm.vector.insert.v3f32.v2f32(<3 x float> [[TMP2]], <2 x float> [[TMP1]], i64 0) +; NON-POW2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <3 x i32> +; NON-POW2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> [[TMP3]], <3 x i32> ; NON-POW2-NEXT: [[TMP5:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP4]], <3 x float> zeroinitializer, <3 x float> zeroinitializer) ; NON-POW2-NEXT: store <3 x float> [[TMP5]], ptr [[COLOUR]], align 4 ; NON-POW2-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll index c30f94159916a..32e59697486a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll @@ -16,17 +16,19 @@ define i1 @test(float %0, double %1) { ; X86-NEXT: [[TMP11:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> poison, <4 x i32> ; X86-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> , <4 x i32> ; X86-NEXT: [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]] -; X86-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP13]], i64 0) -; X86-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v6f64(<8 x double> , <6 x double> [[TMP8]], i64 0) -; X86-NEXT: [[TMP16:%.*]] = fsub <8 x double> [[TMP14]], [[TMP15]] -; X86-NEXT: [[TMP17:%.*]] = fmul <8 x double> [[TMP14]], [[TMP15]] -; X86-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> [[TMP17]], <8 x i32> -; X86-NEXT: [[TMP19:%.*]] = fptrunc <8 x double> [[TMP18]] to <8 x float> -; X86-NEXT: [[TMP20:%.*]] = fmul <8 x float> [[TMP19]], zeroinitializer -; X86-NEXT: [[TMP21:%.*]] = fcmp oeq <8 x float> [[TMP20]], zeroinitializer -; X86-NEXT: [[TMP22:%.*]] = freeze <8 x i1> [[TMP21]] -; X86-NEXT: [[TMP23:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP22]]) -; X86-NEXT: ret i1 [[TMP23]] +; X86-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <8 x i32> +; X86-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP14]], <8 x i32> +; X86-NEXT: [[TMP16:%.*]] = shufflevector <6 x double> [[TMP8]], <6 x double> poison, <8 x i32> +; X86-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP16]], <8 x i32> +; X86-NEXT: [[TMP18:%.*]] = fsub <8 x double> [[TMP15]], [[TMP17]] +; X86-NEXT: [[TMP19:%.*]] = fmul <8 x double> [[TMP15]], [[TMP17]] +; X86-NEXT: [[TMP20:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> [[TMP19]], <8 x i32> +; X86-NEXT: [[TMP21:%.*]] = fptrunc <8 x double> [[TMP20]] to <8 x float> +; X86-NEXT: [[TMP22:%.*]] = fmul <8 x float> [[TMP21]], zeroinitializer +; X86-NEXT: [[TMP23:%.*]] = fcmp oeq <8 x float> [[TMP22]], zeroinitializer +; X86-NEXT: [[TMP24:%.*]] = freeze <8 x i1> [[TMP23]] +; X86-NEXT: [[TMP25:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP24]]) +; X86-NEXT: ret i1 [[TMP25]] ; ; AARCH64-LABEL: define i1 @test ; AARCH64-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) { @@ -42,17 +44,19 @@ define i1 @test(float %0, double %1) { ; AARCH64-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> , <4 x i32> ; AARCH64-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP4]], <4 x i32> ; AARCH64-NEXT: [[TMP14:%.*]] = fmul <4 x double> [[TMP10]], [[TMP13]] -; AARCH64-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP14]], i64 0) -; AARCH64-NEXT: [[TMP16:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v6f64(<8 x double> , <6 x double> [[TMP8]], i64 0) -; AARCH64-NEXT: [[TMP17:%.*]] = fsub <8 x double> [[TMP15]], [[TMP16]] -; AARCH64-NEXT: [[TMP18:%.*]] = fmul <8 x double> [[TMP15]], [[TMP16]] -; AARCH64-NEXT: [[TMP19:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> -; AARCH64-NEXT: [[TMP20:%.*]] = fptrunc <8 x double> [[TMP19]] to <8 x float> -; AARCH64-NEXT: [[TMP21:%.*]] = fmul <8 x float> [[TMP20]], zeroinitializer -; AARCH64-NEXT: [[TMP22:%.*]] = fcmp oeq <8 x float> [[TMP21]], zeroinitializer -; AARCH64-NEXT: [[TMP23:%.*]] = freeze <8 x i1> [[TMP22]] -; AARCH64-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP23]]) -; AARCH64-NEXT: ret i1 [[TMP24]] +; AARCH64-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <8 x i32> +; AARCH64-NEXT: [[TMP16:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP15]], <8 x i32> +; AARCH64-NEXT: [[TMP17:%.*]] = shufflevector <6 x double> [[TMP8]], <6 x double> poison, <8 x i32> +; AARCH64-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP17]], <8 x i32> +; AARCH64-NEXT: [[TMP19:%.*]] = fsub <8 x double> [[TMP16]], [[TMP18]] +; AARCH64-NEXT: [[TMP20:%.*]] = fmul <8 x double> [[TMP16]], [[TMP18]] +; AARCH64-NEXT: [[TMP21:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> [[TMP20]], <8 x i32> +; AARCH64-NEXT: [[TMP22:%.*]] = fptrunc <8 x double> [[TMP21]] to <8 x float> +; AARCH64-NEXT: [[TMP23:%.*]] = fmul <8 x float> [[TMP22]], zeroinitializer +; AARCH64-NEXT: [[TMP24:%.*]] = fcmp oeq <8 x float> [[TMP23]], zeroinitializer +; AARCH64-NEXT: [[TMP25:%.*]] = freeze <8 x i1> [[TMP24]] +; AARCH64-NEXT: [[TMP26:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP25]]) +; AARCH64-NEXT: ret i1 [[TMP26]] ; %3 = fpext float %0 to double %4 = fpext float 0.000000e+00 to double diff --git a/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll index dca34b681032c..a64075db37ba1 100644 --- a/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/extracts-with-undefs.ll @@ -9,9 +9,9 @@ define void @test() { ; CHECK: body: ; CHECK-NEXT: [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[BODY]] ] -; CHECK-NEXT: [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00 -; CHECK-NEXT: [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00 -; CHECK-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> , double [[PHI1]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> , [[TMP8]] +; CHECK-NEXT: [[ADD8_I_I:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[TMP9]]) ; CHECK-NEXT: [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00 ; CHECK-NEXT: br i1 false, label [[BODY]], label [[EXIT:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll b/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll index a42c8f2c650ae..fff988a0a746e 100644 --- a/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll +++ b/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll @@ -28,10 +28,14 @@ define i32 @test(i8 %0) { ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x i8> [[TMP18]], i8 [[TMP13]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <8 x i8> [[TMP17]], [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <64 x i1> , i1 [[CMP13_NOT_5]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v8i1(<64 x i1> [[TMP21]], <8 x i1> [[TMP8]], i64 8) -; CHECK-NEXT: [[TMP23:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v8i1(<64 x i1> [[TMP22]], <8 x i1> [[TMP20]], i64 56) -; CHECK-NEXT: [[TMP24:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v4i1(<64 x i1> [[TMP23]], <4 x i1> [[TMP11]], i64 32) -; CHECK-NEXT: [[TMP25:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v2i1(<64 x i1> [[TMP24]], <2 x i1> [[TMP3]], i64 6) +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i1> [[TMP8]], <8 x i1> poison, <64 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <64 x i1> [[TMP21]], <64 x i1> [[TMP22]], <64 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> poison, <64 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <64 x i1> [[TMP23]], <64 x i1> [[TMP24]], <64 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> poison, <64 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <64 x i1> [[TMP29]], <64 x i1> [[TMP30]], <64 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <64 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <64 x i1> [[TMP31]], <64 x i1> [[TMP28]], <64 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = select <64 x i1> [[TMP25]], <64 x i32> zeroinitializer, <64 x i32> zeroinitializer ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> [[TMP26]]) ; CHECK-NEXT: ret i32 [[TMP27]] diff --git a/llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll b/llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll index f8a6c4dab3d51..c0a0318efd19e 100644 --- a/llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll @@ -24,9 +24,10 @@ define i32 @test(i32 %v, ptr %p) { ; CHECK-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX1]], [[I9_I_I]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP2]] -; CHECK-NEXT: [[TMP14:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP10]], <16 x i1> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP10]], <4 x i1> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i1> [[RDX_OP]], <4 x i1> poison, <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i1> [[TMP10]], <16 x i1> [[TMP15]], <16 x i32> ; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP13]]) ; CHECK-NEXT: [[AND252_US_I_24_I_I:%.*]] = select i1 [[OP_RDX]], i32 0, i32 0 ; CHECK-NEXT: br label %[[INC]] diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 3ef0de177b478..304af88b6d134 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -10,9 +10,10 @@ define i64 @test(ptr %p) { ; RISCV-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 ; RISCV-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 ; RISCV-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 -; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> -; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) -; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) +; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> +; RISCV-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[TMP2]], <8 x i32> +; RISCV-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> +; RISCV-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP7]], <8 x i32> ; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], splat (i64 42) ; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) ; RISCV-NEXT: ret i64 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll index caca410f056c1..8e71f884b3bb4 100644 --- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll +++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll @@ -14,10 +14,10 @@ define void @func(i32 %0) { ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <32 x i32> , i32 [[TMP11]], i32 30 ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i32> [[TMP12]], <32 x i32> poison, <32 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP13]], <8 x i32> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP14]], <4 x i32> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP15]], <2 x i32> zeroinitializer, i64 14) -; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP16]], <2 x i32> zeroinitializer, i64 28) +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i32> [[TMP13]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <32 x i32> [[TMP15]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i32> [[TMP16]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i32> [[TMP14]], <32 x i32> , <32 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i32> [[TMP8]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = sext <32 x i32> [[TMP18]] to <32 x i64> ; CHECK-NEXT: [[TMP20:%.*]] = icmp slt <32 x i64> [[TMP19]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll index 9dbaadeca1f41..1572b6ba3307d 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll @@ -42,7 +42,7 @@ define void @test_missing_lanes_1_3(ptr %ptr, i32 %val0, i32 %val1) { ; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0 ; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4 ; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[GETELEMENTPTR1]], align 4 ; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12 ; CHECK-NEXT: store <4 x i32> poison, ptr [[GETELEMENTPTR3]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll index 250c60a61fea1..5611fda2c0223 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll @@ -32,7 +32,8 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; X86-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42) ; X86-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; X86-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <8 x i32> -; X86-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP3]], <4 x i1> [[TMP1]], i64 4) +; X86-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> +; X86-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP7]], <8 x i32> ; X86-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] ; X86-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) ; X86-NEXT: ret i1 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll index cec99c694391b..b738d25b39be1 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll @@ -7,9 +7,8 @@ define void @test1(ptr %in, ptr %out) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 ; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> @@ -20,9 +19,8 @@ define void @test1(ptr %in, ptr %out) { ; COMBINE-NEXT: entry: ; COMBINE-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 ; COMBINE-NEXT: [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0 -; COMBINE-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> -; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; COMBINE-NEXT: [[TMP5:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> ; COMBINE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <16 x i32> ; COMBINE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> @@ -55,9 +53,8 @@ define void @test2(ptr %in, ptr %out) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 ; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> @@ -68,9 +65,8 @@ define void @test2(ptr %in, ptr %out) { ; COMBINE-NEXT: entry: ; COMBINE-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 ; COMBINE-NEXT: [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0 -; COMBINE-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> -; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <8 x i32> +; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; COMBINE-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> ; COMBINE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <16 x i32> ; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> @@ -101,18 +97,16 @@ entry: define void @test3(<16 x i32> %0, ptr %out) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v16i32(<64 x i32> poison, <16 x i32> [[TMP0:%.*]], i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <64 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0 ; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT]], align 4 ; CHECK-NEXT: ret void ; ; COMBINE-LABEL: @test3( ; COMBINE-NEXT: entry: -; COMBINE-NEXT: [[TMP3:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v16i32(<64 x i32> poison, <16 x i32> [[TMP0:%.*]], i64 0) -; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <64 x i32> -; COMBINE-NEXT: [[TMP1:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <16 x i32> +; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <64 x i32> +; COMBINE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> ; COMBINE-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0 ; COMBINE-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT]], align 4 ; COMBINE-NEXT: ret void @@ -138,9 +132,8 @@ define void @test4(ptr %in, ptr %out) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 4 ; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT]], align 4 ; CHECK-NEXT: ret void @@ -149,9 +142,8 @@ define void @test4(ptr %in, ptr %out) { ; COMBINE-NEXT: entry: ; COMBINE-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 4 ; COMBINE-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0 -; COMBINE-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> -; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; COMBINE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> ; COMBINE-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT]], align 4 ; COMBINE-NEXT: ret void @@ -174,20 +166,14 @@ entry: define void @test5(ptr %out) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 0 -; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP3]], align 4 +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr [[TMP3]], align 4 ; CHECK-NEXT: ret void ; ; COMBINE-LABEL: @test5( ; COMBINE-NEXT: entry: -; COMBINE-NEXT: [[TMP0:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 0) -; COMBINE-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8) -; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <8 x i32> ; COMBINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 0 -; COMBINE-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP3]], align 4 +; COMBINE-NEXT: store <8 x i32> zeroinitializer, ptr [[TMP3]], align 4 ; COMBINE-NEXT: ret void ; entry: @@ -214,7 +200,8 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) { ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i16> [[TMP9]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <32 x i16> [[TMP10]] to <32 x float> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP14]], <4 x float> [[LOAD2]], i64 8) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[GEP10:%.*]] = getelementptr inbounds i8, ptr [[IN1]], i64 32 @@ -222,18 +209,18 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) { ; CHECK-NEXT: [[TMP17:%.*]] = load <8 x float>, ptr [[IN0]], align 16 ; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2]], align 16 ; CHECK-NEXT: [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[LOAD5]], i64 0) -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <32 x i8> [[TMP19]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP19]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i8> [[LOAD5]], <16 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[LOAD5]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <32 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = uitofp <16 x i16> [[TMP18]] to <16 x float> -; CHECK-NEXT: [[TMP20:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> [[LOAD2]], i64 0) -; CHECK-NEXT: [[TMP21:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP17]], i64 0) -; CHECK-NEXT: [[TMP22:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP20]], <4 x float> [[TMP21]], i64 4) -; CHECK-NEXT: [[TMP23:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP17]], i64 4) -; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP22]], <4 x float> [[TMP23]], i64 8) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP22]], <16 x float> [[TMP24]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = fmul <16 x float> [[TMP12]], [[TMP6]] ; CHECK-NEXT: store <16 x float> [[TMP13]], ptr [[GEP11]], align 16 @@ -252,7 +239,8 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) { ; COMBINE-NEXT: [[TMP19:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> poison, <32 x i32> ; COMBINE-NEXT: [[TMP2:%.*]] = uitofp <32 x i16> [[TMP19]] to <32 x float> ; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> -; COMBINE-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP3]], <4 x float> [[LOAD2]], i64 8) +; COMBINE-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> +; COMBINE-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> [[TMP13]], <16 x i32> ; COMBINE-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <32 x i32> ; COMBINE-NEXT: [[TMP7:%.*]] = fmul <32 x float> [[TMP6]], [[TMP2]] ; COMBINE-NEXT: [[GEP10:%.*]] = getelementptr inbounds i8, ptr [[IN1]], i64 32 @@ -260,18 +248,18 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) { ; COMBINE-NEXT: [[TMP8:%.*]] = load <8 x float>, ptr [[IN0]], align 16 ; COMBINE-NEXT: store <32 x float> [[TMP7]], ptr [[IN2]], align 16 ; COMBINE-NEXT: [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1 -; COMBINE-NEXT: [[TMP13:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[LOAD5]], i64 0) -; COMBINE-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <32 x i32> -; COMBINE-NEXT: [[TMP24:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <16 x i32> +; COMBINE-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[LOAD5]], <16 x i8> poison, <32 x i32> +; COMBINE-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[LOAD5]], <16 x i8> poison, <16 x i32> ; COMBINE-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i16> ; COMBINE-NEXT: [[TMP26:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <32 x i32> ; COMBINE-NEXT: [[TMP18:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <16 x i32> ; COMBINE-NEXT: [[TMP9:%.*]] = uitofp <16 x i16> [[TMP18]] to <16 x float> -; COMBINE-NEXT: [[TMP20:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> [[LOAD2]], i64 0) -; COMBINE-NEXT: [[TMP21:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP8]], i64 0) -; COMBINE-NEXT: [[TMP22:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP20]], <4 x float> [[TMP21]], i64 4) -; COMBINE-NEXT: [[TMP23:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP8]], i64 4) -; COMBINE-NEXT: [[TMP15:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP22]], <4 x float> [[TMP23]], i64 8) +; COMBINE-NEXT: [[TMP20:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <4 x i32> +; COMBINE-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> poison, <16 x i32> +; COMBINE-NEXT: [[TMP22:%.*]] = shufflevector <16 x float> [[TMP13]], <16 x float> [[TMP21]], <16 x i32> +; COMBINE-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <4 x i32> +; COMBINE-NEXT: [[TMP27:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> poison, <16 x i32> +; COMBINE-NEXT: [[TMP15:%.*]] = shufflevector <16 x float> [[TMP22]], <16 x float> [[TMP27]], <16 x i32> ; COMBINE-NEXT: [[TMP16:%.*]] = shufflevector <16 x float> [[TMP15]], <16 x float> poison, <16 x i32> ; COMBINE-NEXT: [[TMP17:%.*]] = fmul <16 x float> [[TMP16]], [[TMP9]] ; COMBINE-NEXT: store <16 x float> [[TMP17]], ptr [[GEP11]], align 16 @@ -365,40 +353,12 @@ entry: define i32 @test7() { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP2:%.*]] = fsub <16 x float> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <32 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP7]], <4 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP8]], <4 x float> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = fsub <16 x float> [[TMP9]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = fadd <16 x float> [[TMP9]], [[TMP12]] -; CHECK-NEXT: store <16 x float> [[TMP13]], ptr null, align 16 +; CHECK-NEXT: store <16 x float> zeroinitializer, ptr null, align 16 ; CHECK-NEXT: ret i32 0 ; ; COMBINE-LABEL: @test7( ; COMBINE-NEXT: entry: -; COMBINE-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0) -; COMBINE-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> zeroinitializer, i64 8) -; COMBINE-NEXT: [[TMP2:%.*]] = fsub <16 x float> [[TMP1]], [[TMP1]] -; COMBINE-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[TMP1]], [[TMP1]] -; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <32 x i32> -; COMBINE-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <16 x i32> -; COMBINE-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 0) -; COMBINE-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> zeroinitializer, i64 4) -; COMBINE-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP7]], <4 x float> zeroinitializer, i64 8) -; COMBINE-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP8]], <4 x float> zeroinitializer, i64 12) -; COMBINE-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP5]] -; COMBINE-NEXT: [[TMP11:%.*]] = fsub <16 x float> [[TMP9]], [[TMP5]] -; COMBINE-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP11]], <16 x i32> -; COMBINE-NEXT: [[TMP13:%.*]] = fadd <16 x float> [[TMP9]], [[TMP12]] -; COMBINE-NEXT: store <16 x float> [[TMP13]], ptr null, align 16 +; COMBINE-NEXT: store <16 x float> zeroinitializer, ptr null, align 16 ; COMBINE-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index afe92f89ac0d1..ac8b10a0087d0 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -93,19 +93,15 @@ define void @test4(ptr %in, ptr %out) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[IN:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP4]], <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP8]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <16 x float> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> , <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <16 x float> [[TMP10]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i1, ptr [[OUT:%.*]], i64 8 -; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 8) +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i1> [[TMP5]], <16 x i1> poison, <8 x i32> ; CHECK-NEXT: store <8 x i1> [[TMP13]], ptr [[OUT]], align 1 -; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP5]], <16 x i1> poison, <8 x i32> ; CHECK-NEXT: store <8 x i1> [[TMP14]], ptr [[TMP12]], align 1 ; CHECK-NEXT: ret void ; @@ -151,22 +147,14 @@ define <4 x i1> @test6(ptr %in1, ptr %in2) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[IN1:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[IN2:%.*]], align 2 -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP7]], <4 x i32> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP8]], <4 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP9]], <4 x i32> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]] -; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> [[TMP1]], i64 0) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i16> [[TMP15]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP17]], <4 x i16> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP18]], <4 x i16> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP20:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP19]], <4 x i16> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq <16 x i16> [[TMP16]], [[TMP20]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq <16 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <32 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <32 x i1> [[TMP6]], <32 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = and <16 x i1> [[TMP11]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]] +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <32 x i1> [[TMP6]], <32 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = and <16 x i1> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP25]]) @@ -217,10 +205,7 @@ entry: define void @test7() { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> [[TMP1]], <8 x i64> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i64> [[TMP2]] to <16 x i16> -; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr null, align 2 +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr null, align 2 ; CHECK-NEXT: ret void ; %1 = getelementptr i8, ptr null, i64 16 @@ -234,18 +219,12 @@ define void @test7() { define void @test8() { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> poison, <2 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP0]], <2 x float> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP1]], <2 x float> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP2]], <2 x float> zeroinitializer, i64 6) -; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> zeroinitializer, i64 2) ; CHECK-NEXT: br i1 false, label [[FOR0:%.*]], label [[FOR_BODY:%.*]] ; CHECK: for0: -; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x float> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <8 x float> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ [[TMP7]], [[FOR_BODY]] ], [ [[TMP5]], [[ENTRY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ [[TMP7]], [[FOR_BODY]] ], [ zeroinitializer, [[ENTRY]] ] ; CHECK-NEXT: [[TMP8]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <8 x i32> ; CHECK-NEXT: br i1 false, label [[FOR0]], label [[FOR_BODY]] ; @@ -268,13 +247,9 @@ for.body: define void @test9() { ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP0]], <4 x i16> zeroinitializer, i64 4) ; CHECK-NEXT: br label [[FOR_BODY13:%.*]] ; CHECK: for.body13: -; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[TMP2]] to <8 x i32> -; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr null, align 4 +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: br label [[FOR_BODY13]] ; entry: @@ -293,9 +268,8 @@ define void @test10() { ; CHECK-LABEL: @test10( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr null, align 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i8> [[TMP4]] to <16 x i16> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <32 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <16 x i32> @@ -334,14 +308,13 @@ define void @test11(<2 x i64> %0, i64 %1, <2 x i64> %2) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> , [[TMP2:%.*]] ; CHECK-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> poison, <2 x i16> [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i16> [[TMP5]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i16> -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> [[TMP6]], <2 x i16> [[TMP7]], i64 2) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i16> [[TMP7]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i8> -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> poison, <2 x i8> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP10]], <2 x i8> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP12:%.*]] = urem <4 x i8> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[TMP11:%.*]] = urem <4 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[TMP11]], zeroinitializer ; CHECK-NEXT: ret void ; entry: @@ -365,21 +338,15 @@ define void @test12() { ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x float> [[TMP6]], <32 x float> [[TMP7]], <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x float> [[TMP10]], <32 x float> [[TMP11]], <32 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double> -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> zeroinitializer, [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float> -; CHECK-NEXT: [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = fcmp ogt <32 x float> zeroinitializer, [[TMP15]] ; CHECK-NEXT: ret void ; entry: @@ -413,22 +380,17 @@ entry: define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) { ; CHECK-LABEL: @test13( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> [[TMP0:%.*]], i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <32 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP9]], <8 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP10]], <8 x i32> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP5]], <8 x i32> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0:%.*]], <8 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 4) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[OR0:%.*]] = or <4 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: store <4 x i32> [[OR0]], ptr [[OUT0:%.*]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP3]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[OUT1:%.*]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP3]], i64 12) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP3]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[OUT2:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -454,19 +416,14 @@ for.end.loopexit: define void @test14(<8 x i1> %0) { ; CHECK-LABEL: @test14( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v8i1(<16 x i1> poison, <8 x i1> [[TMP0:%.*]], i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i1> [[TMP1]], <16 x i1> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP0:%.*]], <8 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <32 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> poison, <8 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP9]], <8 x i16> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP10]], <8 x i16> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP7]], <8 x i16> zeroinitializer, i64 24) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: [[TMP6:%.*]] = phi <16 x i16> [ [[TMP5]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v16i16(<16 x i16> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> poison, <4 x i32> ; CHECK-NEXT: [[OR0:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer ; CHECK-NEXT: ret void ; @@ -496,15 +453,9 @@ define i32 @test15() { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr [[TMP1]], align 16 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[TMP1]], align 16 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr null, align 16 -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP4]], <4 x float> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP5]], <4 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP8]], <4 x float> zeroinitializer, i64 12) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP9]], <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = fadd <16 x float> [[TMP7]], [[TMP11]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> , <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = fadd <16 x float> zeroinitializer, [[TMP5]] ; CHECK-NEXT: store <16 x float> [[TMP12]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll b/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll deleted file mode 100644 index 10d4fa2be0a70..0000000000000 --- a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll +++ /dev/null @@ -1,161 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes=structurizecfg < %s | FileCheck %s - - -%pair = type { i32, i32 } -define void @test_if_then_else(ptr %ptr, i1 %cond) { -; CHECK-LABEL: define void @test_if_then_else( -; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true -; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4 -; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0 -; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[SUM_ELSE:%.*]], %[[ELSE]] ], [ [[A_THEN]], %[[ENTRY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: br label %[[MERGE]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0 -; CHECK-NEXT: [[SUM_ELSE]] = add i32 [[A_ELSE]], 1 -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[MERGE]]: -; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4 -; CHECK-NEXT: ret void -; -entry: - %load_then = load %pair, ptr %ptr - br i1 %cond, label %then, label %else - -then: - %a_then = extractvalue %pair %load_then, 0 - br label %merge - -else: - %a_else = extractvalue %pair %load_then, 0 - %sum_else = add i32 %a_else, 1 - br label %merge - -merge: - %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ] - store i32 %phi, ptr %ptr - ret void -} - -define void @test_if_else_then(ptr %ptr, i1 %cond) { -; CHECK-LABEL: define void @test_if_else_then( -; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true -; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4 -; CHECK-NEXT: br i1 [[COND_INV]], label %[[THEN:.*]], label %[[FLOW:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0 -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A_THEN]], %[[THEN]] ], [ poison, %[[ENTRY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[THEN]] ], [ true, %[[ENTRY]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0 -; CHECK-NEXT: [[SUM_ELSE:%.*]] = add i32 [[A_ELSE]], 1 -; CHECK-NEXT: br label %[[MERGE]] -; CHECK: [[MERGE]]: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[SUM_ELSE]], %[[ELSE]] ] -; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4 -; CHECK-NEXT: ret void -; -entry: - %load_then = load %pair, ptr %ptr - br i1 %cond, label %else, label %then - -then: - %a_then = extractvalue %pair %load_then, 0 - br label %merge - -else: - %a_else = extractvalue %pair %load_then, 0 - %sum_else = add i32 %a_else, 1 - br label %merge - -merge: - %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ] - store i32 %phi, ptr %ptr - ret void -} - -define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 { -; CHECK-LABEL: define amdgpu_kernel void @test_loop_with_if( -; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true -; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[I3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I15:%.*]], %[[LATCH:.*]] ] -; CHECK-NEXT: [[LOAD:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4 -; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0 -; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[FLOW:.*]] -; CHECK: [[IF]]: -; CHECK-NEXT: [[I9:%.*]] = icmp sle i32 [[I3]], 10 -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[FLOW1:.*]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[Y:%.*]], %[[ELSE:.*]] ], [ [[A_THEN]], %[[FLOW]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ [[TMP2:%.*]], %[[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: store i32 [[I]], ptr [[PTR]], align 4 -; CHECK-NEXT: br label %[[LATCH]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP2]] = phi i1 [ true, %[[IF]] ], [ false, %[[LOOP]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[I9]], %[[IF]] ], [ [[COND_INV]], %[[LOOP]] ] -; CHECK-NEXT: br i1 [[TMP3]], label %[[ELSE]], label %[[FLOW1]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[I2:%.*]] = extractvalue [[PAIR]] [[LOAD]], 1 -; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0 -; CHECK-NEXT: [[Y]] = add i32 [[A_ELSE]], [[I2]] -; CHECK-NEXT: br label %[[FLOW1]] -; CHECK: [[LATCH]]: -; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4 -; CHECK-NEXT: [[I15]] = add nsw i32 [[TMP0]], 20 -; CHECK-NEXT: [[I16:%.*]] = icmp sge i32 [[I15]], 255 -; CHECK-NEXT: br i1 [[I16]], label %[[END:.*]], label %[[LOOP]] -; CHECK: [[END]]: -; CHECK-NEXT: ret void -; -entry: - %a = tail call i32 @llvm.amdgcn.workitem.id.x() - br label %loop - -loop: - %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ] - %load = load %pair, ptr %ptr - br i1 %cond, label %if, label %else - -if: - %cmp = icmp sgt i32 %entry_phi, 10 - br i1 %cmp, label %then, label %else - -then: - %a_then = extractvalue %pair %load, 0 - store i32 %a, ptr %ptr, align 4 - br label %latch - -else: - %a2 = extractvalue %pair %load, 1 - %y = extractvalue %pair %load, 0 - %a_else = add i32 %y, %a2 - br label %latch - -latch: - %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ] - store i32 %a_test, ptr %ptr - %a15 = add nsw i32 %a_test, 20 - %a16 = icmp slt i32 %a15, 255 - br i1 %a16, label %loop, label %end - -end: - ret void -} diff --git a/llvm/test/tools/dxil-dis/lifetimes.ll b/llvm/test/tools/dxil-dis/lifetimes.ll new file mode 100644 index 0000000000000..cb3e6291c7bc0 --- /dev/null +++ b/llvm/test/tools/dxil-dis/lifetimes.ll @@ -0,0 +1,38 @@ +; RUN: llc --filetype=obj %s -o - | dxil-dis -o - | FileCheck %s +target triple = "dxil-unknown-shadermodel6.7-library" + +define void @test_lifetimes() { +; CHECK-LABEL: test_lifetimes +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i32], [2 x i32]* [[ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: store i32 0, i32* [[GEP]], align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast [2 x i32]* [[ALLOCA]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* nonnull [[BITCAST]]) +; CHECK-NEXT: ret void +; + %a = alloca [2 x i32], align 4 + %gep = getelementptr [2 x i32], ptr %a, i32 0, i32 0 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %a) + store i32 0, ptr %gep, align 4 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %a) + ret void +} + +; CHECK-DAG: attributes [[LIFETIME_ATTRS:#.*]] = { nounwind } + +; CHECK-DAG: ; Function Attrs: nounwind +; CHECK-DAG: declare void @llvm.lifetime.start(i64, i8* nocapture) [[LIFETIME_ATTRS]] + +; CHECK-DAG: ; Function Attrs: nounwind +; CHECK-DAG: declare void @llvm.lifetime.end(i64, i8* nocapture) [[LIFETIME_ATTRS]] + +; Function Attrs: nounwind memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64, ptr) #0 + +; Function Attrs: nounwind memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64, ptr) #0 + +attributes #0 = { nounwind memory(argmem: readwrite) } + diff --git a/llvm/tools/llvm-c-test/debuginfo.c b/llvm/tools/llvm-c-test/debuginfo.c index 8c6f6436782e9..e73f69743805c 100644 --- a/llvm/tools/llvm-c-test/debuginfo.c +++ b/llvm/tools/llvm-c-test/debuginfo.c @@ -235,6 +235,53 @@ int llvm_test_dibuilder(void) { M, "LargeEnumTest", LLVMMetadataAsValue(LLVMGetModuleContext(M), LargeEnumTest)); + LLVMValueRef FooVal3 = LLVMConstInt(LLVMInt64Type(), 8, false); + LLVMValueRef FooVal4 = LLVMConstInt(LLVMInt64Type(), 4, false); + LLVMMetadataRef lo = LLVMValueAsMetadata(FooVal1); + LLVMMetadataRef hi = LLVMValueAsMetadata(FooVal2); + LLVMMetadataRef strd = LLVMValueAsMetadata(FooVal3); + LLVMMetadataRef bias = LLVMValueAsMetadata(FooVal4); + LLVMMetadataRef SubrangeMetadataTy = LLVMDIBuilderCreateSubrangeType( + DIB, File, "foo", 3, 42, File, 64, 0, 0, Int64Ty, lo, hi, strd, bias); + LLVMAddNamedMetadataOperand( + M, "SubrangeType", + LLVMMetadataAsValue(LLVMGetModuleContext(M), SubrangeMetadataTy)); + + LLVMMetadataRef SetMetadataTy1 = LLVMDIBuilderCreateSetType( + DIB, File, "enumset", 7, File, 42, 64, 0, EnumTest); + LLVMMetadataRef SetMetadataTy2 = LLVMDIBuilderCreateSetType( + DIB, File, "subrangeset", 11, File, 42, 64, 0, SubrangeMetadataTy); + LLVMAddNamedMetadataOperand( + M, "SetType1", + LLVMMetadataAsValue(LLVMGetModuleContext(M), SetMetadataTy1)); + LLVMAddNamedMetadataOperand( + M, "SetType2", + LLVMMetadataAsValue(LLVMGetModuleContext(M), SetMetadataTy2)); + + LLVMMetadataRef DynSubscripts[] = { + LLVMDIBuilderGetOrCreateSubrange(DIB, 0, 10), + }; + LLVMMetadataRef Loc = LLVMDIBuilderCreateExpression(DIB, NULL, 0); + LLVMMetadataRef Rank = LLVMDIBuilderCreateExpression(DIB, NULL, 0); + LLVMMetadataRef DynamicArrayMetadataTy = LLVMDIBuilderCreateDynamicArrayType( + DIB, File, "foo", 3, 42, File, 64 * 10, 0, Int64Ty, DynSubscripts, 1, Loc, + FooVar1, NULL, Rank, NULL); + LLVMAddNamedMetadataOperand( + M, "DynType", + LLVMMetadataAsValue(LLVMGetModuleContext(M), DynamicArrayMetadataTy)); + + LLVMMetadataRef StructPTy = LLVMDIBuilderCreateForwardDecl( + DIB, 2 /*DW_TAG_class_type*/, "Class1", 5, NameSpace, File, 0, 0, 192, 0, + "FooClass", 8); + + LLVMMetadataRef Int32Ty = + LLVMDIBuilderCreateBasicType(DIB, "Int32", 5, 32, 0, LLVMDIFlagZero); + LLVMMetadataRef StructElts[] = {Int64Ty, Int64Ty, Int32Ty}; + LLVMMetadataRef ClassArr = LLVMDIBuilderGetOrCreateArray(DIB, StructElts, 3); + LLVMReplaceArrays(DIB, &StructPTy, &ClassArr, 1); + LLVMAddNamedMetadataOperand( + M, "ClassType", LLVMMetadataAsValue(LLVMGetModuleContext(M), StructPTy)); + // Using the new debug format, debug records get attached to instructions. // Insert a `br` and `ret` now to absorb the debug records which are // currently "trailing", meaning that they're associated with a block diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp index f92b2b55fa8dc..a705e7d51d874 100644 --- a/llvm/tools/llvm-dwarfdump/Statistics.cpp +++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp @@ -878,7 +878,7 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, DenseSet UniqueLines; DenseSet UniqueNonZeroLines; - for (const auto &CU : static_cast(&DICtx)->compile_units()) { + for (const auto &CU : DICtx.compile_units()) { if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false)) { // This variable holds variable information for functions with // abstract_origin, but just for the current CU. diff --git a/llvm/tools/llvm-readobj/ObjDumper.cpp b/llvm/tools/llvm-readobj/ObjDumper.cpp index d3c613ee823ba..1a535ede07096 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.cpp +++ b/llvm/tools/llvm-readobj/ObjDumper.cpp @@ -203,7 +203,7 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile &Obj, W.getOStream() << ' '; for (i = 0; TmpSecPtr < SecEnd && i < 4; ++i) { for (k = 0; TmpSecPtr < SecEnd && k < 4; k++, TmpSecPtr++) { - uint8_t Val = *(reinterpret_cast(TmpSecPtr)); + uint8_t Val = *TmpSecPtr; W.getOStream() << format_hex_no_prefix(Val, 2); } W.getOStream() << ' '; diff --git a/llvm/unittests/ADT/DenseMapTest.cpp b/llvm/unittests/ADT/DenseMapTest.cpp index c95f96c4bb3c6..bdfbc8557859a 100644 --- a/llvm/unittests/ADT/DenseMapTest.cpp +++ b/llvm/unittests/ADT/DenseMapTest.cpp @@ -15,6 +15,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include +#include #include #include #include @@ -86,6 +87,10 @@ struct CtorTesterMapInfo { CtorTester getTestKey(int i, CtorTester *) { return CtorTester(i); } CtorTester getTestValue(int i, CtorTester *) { return CtorTester(42 + i); } +std::optional getTestKey(int i, std::optional *) { + return i; +} + // Test fixture, with helper functions implemented by forwarding to global // function overloads selected by component types of the type parameter. This // allows all of the map implementations to be tested with shared @@ -117,11 +122,13 @@ typedef ::testing::Types, DenseMap, DenseMap, DenseMap, + DenseMap, uint32_t>, SmallDenseMap, SmallDenseMap, SmallDenseMap, - SmallDenseMap + SmallDenseMap, + SmallDenseMap, uint32_t> > DenseMapTestTypes; // clang-format on diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index 9b88e423e802b..678960418d7d7 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1738,4 +1738,34 @@ TEST_F(ScalarEvolutionsTest, ComplexityComparatorIsStrictWeakOrdering2) { SE.getAddExpr(Ops); } +TEST_F(ScalarEvolutionsTest, ComplexityComparatorIsStrictWeakOrdering3) { + Type *Int64Ty = Type::getInt64Ty(Context); + Constant *Init = Constant::getNullValue(Int64Ty); + Type *PtrTy = PointerType::get(Context, 0); + Constant *Null = Constant::getNullValue(PtrTy); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Context), {}, false); + + Value *V0 = new GlobalVariable(M, Int64Ty, false, + GlobalValue::ExternalLinkage, Init, "V0"); + Value *V1 = new GlobalVariable(M, Int64Ty, false, + GlobalValue::ExternalLinkage, Init, "V1"); + Value *V2 = new GlobalVariable(M, Int64Ty, false, + GlobalValue::InternalLinkage, Init, "V2"); + Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M); + BasicBlock *BB = BasicBlock::Create(Context, "entry", F); + Value *C0 = ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, V0, Null, + "c0", BB); + Value *C1 = ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, V1, Null, + "c1", BB); + Value *C2 = ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, V2, Null, + "c2", BB); + Value *Or0 = BinaryOperator::CreateOr(C0, C1, "or0", BB); + Value *Or1 = BinaryOperator::CreateOr(Or0, C2, "or1", BB); + ReturnInst::Create(Context, nullptr, BB); + ScalarEvolution SE = buildSE(*F); + // When _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG, this will + // crash if the comparator is inconsistent about global variable linkage. + SE.getSCEV(Or1); +} + } // end namespace llvm diff --git a/llvm/unittests/Support/EndianTest.cpp b/llvm/unittests/Support/EndianTest.cpp index bba1a56168f70..59281c0ed5444 100644 --- a/llvm/unittests/Support/EndianTest.cpp +++ b/llvm/unittests/Support/EndianTest.cpp @@ -237,6 +237,7 @@ TEST(Endian, PackedEndianSpecificIntegral) { reinterpret_cast(little + 1); EXPECT_EQ(*big_val, *little_val); + EXPECT_EQ(big_val->value(), little_val->value()); } TEST(Endian, PacketEndianSpecificIntegralAsEnum) { diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index 7d57297eb7c0b..d17d90b452bd7 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -32,9 +32,9 @@ using namespace llvm; #define DEBUG_TYPE "searchable-table-emitter" static int64_t getAsInt(const Init *B) { - if (const BitsInit *BI = dyn_cast(B)) + if (const auto *BI = dyn_cast(B)) return *BI->convertInitializerToInt(); - if (const IntInit *II = dyn_cast(B)) + if (const auto *II = dyn_cast(B)) return II->getValue(); llvm_unreachable("Unexpected initializer"); } @@ -126,20 +126,21 @@ class SearchableTableEmitter { std::string primaryRepresentation(SMLoc Loc, const GenericField &Field, const Init *I) { - if (const StringInit *SI = dyn_cast(I)) { + if (const auto *SI = dyn_cast(I)) { if (Field.IsCode || SI->hasCodeFormat()) return SI->getValue().str(); else return SI->getAsString(); - } else if (const BitsInit *BI = dyn_cast(I)) + } + if (const auto *BI = dyn_cast(I)) return "0x" + utohexstr(getAsInt(BI)); - else if (const BitInit *BI = dyn_cast(I)) + if (const auto *BI = dyn_cast(I)) return BI->getValue() ? "true" : "false"; - else if (Field.IsIntrinsic) + if (Field.IsIntrinsic) return "Intrinsic::" + getIntrinsic(I).EnumName.str(); - else if (Field.IsInstruction) + if (Field.IsInstruction) return I->getAsString(); - else if (Field.Enum) { + if (Field.Enum) { const GenericEnum::Entry *Entry = Field.Enum->getEntry(cast(I)->getDef()); if (!Entry) @@ -152,7 +153,7 @@ class SearchableTableEmitter { } bool isIntrinsic(const Init *I) { - if (const DefInit *DI = dyn_cast(I)) + if (const auto *DI = dyn_cast(I)) return DI->getDef()->isSubClassOf("Intrinsic"); return false; } @@ -174,7 +175,8 @@ class SearchableTableEmitter { if (Ctx == TypeInTempStruct) return "std::string"; return "StringRef"; - } else if (const BitsRecTy *BI = dyn_cast(Field.RecType)) { + } + if (const auto *BI = dyn_cast(Field.RecType)) { unsigned NumBits = BI->getNumBits(); if (NumBits <= 8) return "uint8_t"; @@ -188,9 +190,10 @@ class SearchableTableEmitter { "' lookup method '" + Index.Name + "', key field '" + Field.Name + "' of type bits is too large"); - } else if (isa(Field.RecType)) { + } + if (isa(Field.RecType)) return "bool"; - } else if (Field.Enum || Field.IsIntrinsic || Field.IsInstruction) + if (Field.Enum || Field.IsIntrinsic || Field.IsInstruction) return "unsigned"; PrintFatalError(Index.Loc, Twine("In table '") + Table.Name + "' lookup method '" + @@ -244,67 +247,81 @@ int64_t SearchableTableEmitter::getNumericKey(const SearchIndex &Index, /// key of \p Index. bool SearchableTableEmitter::compareBy(const Record *LHS, const Record *RHS, const SearchIndex &Index) { - for (const auto &Field : Index.Fields) { - const Init *LHSI = LHS->getValueInit(Field.Name); - const Init *RHSI = RHS->getValueInit(Field.Name); + // Compare two values and return: + // * -1 if LHS < RHS. + // * 1 if LHS > RHS. + // * 0 if LHS == RHS. + auto CmpLTValue = [](const auto &LHS, const auto &RHS) -> int { + if (LHS < RHS) + return -1; + if (LHS > RHS) + return 1; + return 0; + }; + + // Specialized form of `CmpLTValue` for string-like types that uses compare() + // to do the comparison of the 2 strings once (instead if 2 comparisons if we + // use `CmpLTValue`). + auto CmpLTString = [](StringRef LHS, StringRef RHS) -> int { + return LHS.compare(RHS); + }; + // Compare two fields and returns: + // - true if LHS < RHS. + // - false if LHS > RHS. + // - std::nullopt if LHS == RHS. + auto CmpLTField = [this, &Index, &CmpLTValue, + &CmpLTString](const Init *LHSI, const Init *RHSI, + const GenericField &Field) -> int { if (isa(Field.RecType) || isa(Field.RecType)) { int64_t LHSi = getAsInt(LHSI); int64_t RHSi = getAsInt(RHSI); - if (LHSi < RHSi) - return true; - if (LHSi > RHSi) - return false; - } else if (Field.IsIntrinsic) { + return CmpLTValue(LHSi, RHSi); + } + + if (Field.IsIntrinsic) { const CodeGenIntrinsic &LHSi = getIntrinsic(LHSI); const CodeGenIntrinsic &RHSi = getIntrinsic(RHSI); - if (std::tie(LHSi.TargetPrefix, LHSi.Name) < - std::tie(RHSi.TargetPrefix, RHSi.Name)) - return true; - if (std::tie(LHSi.TargetPrefix, LHSi.Name) > - std::tie(RHSi.TargetPrefix, RHSi.Name)) - return false; - } else if (Field.IsInstruction) { + if (int Cmp = CmpLTString(LHSi.TargetPrefix, RHSi.TargetPrefix)) + return Cmp; + return CmpLTString(LHSi.Name, RHSi.Name); + } + + if (Field.IsInstruction) { // This does not correctly compare the predefined instructions! const Record *LHSr = cast(LHSI)->getDef(); const Record *RHSr = cast(RHSI)->getDef(); - bool LHSpseudo = LHSr->getValueAsBit("isPseudo"); - bool RHSpseudo = RHSr->getValueAsBit("isPseudo"); - if (LHSpseudo && !RHSpseudo) - return true; - if (!LHSpseudo && RHSpseudo) - return false; + // Order pseudo instructions before non-pseudo ones. + bool LHSNotPseudo = !LHSr->getValueAsBit("isPseudo"); + bool RHSNotPseudo = !RHSr->getValueAsBit("isPseudo"); + if (int Cmp = CmpLTValue(LHSNotPseudo, RHSNotPseudo)) + return Cmp; + return CmpLTString(LHSr->getName(), RHSr->getName()); + } - int comp = LHSr->getName().compare(RHSr->getName()); - if (comp < 0) - return true; - if (comp > 0) - return false; - } else if (Field.Enum) { - auto LHSr = cast(LHSI)->getDef(); - auto RHSr = cast(RHSI)->getDef(); + if (Field.Enum) { + const Record *LHSr = cast(LHSI)->getDef(); + const Record *RHSr = cast(RHSI)->getDef(); int64_t LHSv = Field.Enum->getEntry(LHSr)->Value; int64_t RHSv = Field.Enum->getEntry(RHSr)->Value; - if (LHSv < RHSv) - return true; - if (LHSv > RHSv) - return false; - } else { - std::string LHSs = primaryRepresentation(Index.Loc, Field, LHSI); - std::string RHSs = primaryRepresentation(Index.Loc, Field, RHSI); - - if (isa(Field.RecType)) { - LHSs = StringRef(LHSs).upper(); - RHSs = StringRef(RHSs).upper(); - } + return CmpLTValue(LHSv, RHSv); + } - int comp = LHSs.compare(RHSs); - if (comp < 0) - return true; - if (comp > 0) - return false; + std::string LHSs = primaryRepresentation(Index.Loc, Field, LHSI); + std::string RHSs = primaryRepresentation(Index.Loc, Field, RHSI); + if (isa(Field.RecType)) { + LHSs = StringRef(LHSs).upper(); + RHSs = StringRef(RHSs).upper(); } + return CmpLTString(LHSs, RHSs); + }; + + for (const GenericField &Field : Index.Fields) { + const Init *LHSI = LHS->getValueInit(Field.Name); + const Init *RHSI = RHS->getValueInit(Field.Name); + if (int Cmp = CmpLTField(LHSI, RHSI, Field)) + return Cmp < 0; } return false; } @@ -359,8 +376,8 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table, std::vector> Entries; Entries.reserve(Table.Entries.size()); - for (unsigned i = 0; i < Table.Entries.size(); ++i) - Entries.emplace_back(Table.Entries[i], i); + for (auto [Idx, TblEntry] : enumerate(Table.Entries)) + Entries.emplace_back(TblEntry, Idx); llvm::stable_sort(Entries, [&](const std::pair &LHS, @@ -369,19 +386,19 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table, }); IndexRowsStorage.reserve(Entries.size()); - for (const auto &Entry : Entries) { - IndexRowsStorage.push_back(Entry.first); + for (const auto &[EntryRec, EntryIndex] : Entries) { + IndexRowsStorage.push_back(EntryRec); OS << " { "; ListSeparator LS; for (const auto &Field : Index.Fields) { std::string Repr = primaryRepresentation( - Index.Loc, Field, Entry.first->getValueInit(Field.Name)); + Index.Loc, Field, EntryRec->getValueInit(Field.Name)); if (isa(Field.RecType)) Repr = StringRef(Repr).upper(); OS << LS << Repr; } - OS << ", " << Entry.second << " },\n"; + OS << ", " << EntryIndex << " },\n"; } OS << " };\n\n"; @@ -398,8 +415,8 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table, Index.Fields[0].IsInstruction)) { int64_t FirstKeyVal = getNumericKey(Index, IndexRows[0]); IsContiguous = true; - for (unsigned i = 0; i < IndexRows.size(); ++i) { - if (getNumericKey(Index, IndexRows[i]) != (FirstKeyVal + i)) { + for (const auto &[Idx, IndexRow] : enumerate(IndexRows)) { + if (getNumericKey(Index, IndexRow) != FirstKeyVal + (int64_t)Idx) { IsContiguous = false; break; } @@ -509,9 +526,9 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table, OS << " ||\n Key." << Field.Name << " != Idx->" << Field.Name; } - if (ShouldReturnRange) + if (ShouldReturnRange) { OS << " return llvm::make_range(It.first, It.second);\n"; - else if (IsPrimary) { + } else if (IsPrimary) { OS << ")\n return nullptr;\n\n"; OS << " return &*Idx;\n"; } else { @@ -557,8 +574,7 @@ void SearchableTableEmitter::emitGenericTable(const GenericTable &Table, // The primary data table contains all the fields defined for this map. OS << "constexpr " << Table.CppTypeName << " " << Table.Name << "[] = {\n"; - for (unsigned i = 0; i < Table.Entries.size(); ++i) { - const Record *Entry = Table.Entries[i]; + for (const auto &[Idx, Entry] : enumerate(Table.Entries)) { OS << " { "; ListSeparator LS; @@ -567,7 +583,7 @@ void SearchableTableEmitter::emitGenericTable(const GenericTable &Table, << primaryRepresentation(Table.Locs[0], Field, Entry->getValueInit(Field.Name)); - OS << " }, // " << i << "\n"; + OS << " }, // " << Idx << "\n"; } OS << " };\n"; diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/BUILD.gn index eb8aef259bfd2..b8c8585a33a9b 100644 --- a/llvm/utils/gn/secondary/clang/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Analysis/BUILD.gn @@ -27,6 +27,7 @@ static_library("Analysis") { "FixitUtil.cpp", "IntervalPartition.cpp", "IssueHash.cpp", + "LifetimeSafety.cpp", "LiveVariables.cpp", "MacroExpansionContext.cpp", "ObjCNoReturn.cpp", diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 2e7aa45f38e3e..44f5fdc20837c 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -78,14 +78,12 @@ static_library("builtins") { cflags += [ "-fomit-frame-pointer" ] } cflags_c = [ "-std=c11" ] - cflags_cc = [ "-nostdinc++" ] } defines = builtins_defines sources = builtins_sources deps = lse_targets - include_dirs = [ "//third-party/siphash/include" ] } # Currently unused but necessary to make sync_source_lists_from_cmake.py happy. diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni index bba2a4e891aa6..ba151075c0f9d 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni @@ -429,7 +429,6 @@ if (current_cpu == "arm") { if (current_cpu == "arm64") { builtins_sources -= [ "fp_mode.c" ] builtins_sources += [ - "aarch64/emupac.cpp", "aarch64/fp_mode.c", "cpu_model/aarch64.c", ] diff --git a/llvm/utils/gn/secondary/compiler-rt/test/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/builtins/BUILD.gn index 97e4fdf61ec2d..87848075a804e 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/builtins/BUILD.gn @@ -46,7 +46,6 @@ if (current_toolchain != host_toolchain) { "//compiler-rt/include($host_toolchain)", "//compiler-rt/lib/builtins", "//compiler-rt/test:lit_common_configured", - "//llvm/utils/not($host_toolchain)", ] } } diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Language/CPlusPlus/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Language/CPlusPlus/BUILD.gn index 4ad599820ac57..3a7508ab7187e 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Language/CPlusPlus/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Language/CPlusPlus/BUILD.gn @@ -62,5 +62,6 @@ static_library("CPlusPlus") { "LibStdcppUniquePointer.cpp", "MSVCUndecoratedNameParser.cpp", "MsvcStl.cpp", + "MsvcStlSmartPointer.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index ad73f51e57eaf..306e4d3f9f6b8 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -140,6 +140,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVInsertVSETVLI.cpp", "RISCVInsertWriteVXRM.cpp", "RISCVInstrInfo.cpp", + "RISCVInterleavedAccess.cpp", "RISCVLandingPadSetup.cpp", "RISCVLateBranchOpt.cpp", "RISCVLoadStoreOptimizer.cpp", diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake index 353e64b3d013e..ff4269ed7acd2 100644 --- a/mlir/cmake/modules/AddMLIR.cmake +++ b/mlir/cmake/modules/AddMLIR.cmake @@ -1,3 +1,4 @@ +include(TableGen) include(GNUInstallDirs) include(LLVMDistributionSupport) diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index c9d2a54433736..8a5976e547169 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -80,6 +80,7 @@ #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" #include "mlir/Conversion/VectorToSPIRV/VectorToSPIRVPass.h" #include "mlir/Conversion/VectorToXeGPU/VectorToXeGPU.h" +#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h" namespace mlir { diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 5a864865adffc..50c67da91a4af 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -1495,4 +1495,13 @@ def ConvertVectorToXeGPU : Pass<"convert-vector-to-xegpu"> { ]; } +//===----------------------------------------------------------------------===// +// XeVMToLLVM +//===----------------------------------------------------------------------===// + +def ConvertXeVMToLLVMPass : Pass<"convert-xevm-to-llvm"> { + let summary = "Convert XeVM to LLVM dialect"; + let dependentDialects = ["LLVM::LLVMDialect"]; +} + #endif // MLIR_CONVERSION_PASSES diff --git a/mlir/include/mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h b/mlir/include/mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h new file mode 100644 index 0000000000000..7ffdbd4307f9e --- /dev/null +++ b/mlir/include/mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h @@ -0,0 +1,27 @@ +//===-- XeVMToLLVM.h - Convert XeVM to LLVM dialect -------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef MLIR_CONVERSION_XEVMTOLLVM_XEVMTOLLVMPASS_H_ +#define MLIR_CONVERSION_XEVMTOLLVM_XEVMTOLLVMPASS_H_ + +#include + +namespace mlir { +class DialectRegistry; +class LLVMTypeConverter; +class RewritePatternSet; +class Pass; + +#define GEN_PASS_DECL_CONVERTXEVMTOLLVMPASS +#include "mlir/Conversion/Passes.h.inc" + +void populateXeVMToLLVMConversionPatterns(RewritePatternSet &patterns); + +void registerConvertXeVMToLLVMInterface(DialectRegistry ®istry); +} // namespace mlir + +#endif // MLIR_CONVERSION_XEVMTOLLVM_XEVMTOLLVMPASS_H_ diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td index 3766eb16e9429..187ac9aa18aac 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td @@ -258,4 +258,38 @@ def GpuSPIRVAttachTarget: Pass<"spirv-attach-target", ""> { ]; } +def GpuXeVMAttachTarget : Pass<"xevm-attach-target", ""> { + let summary = "Attaches a XeVM target attribute to a GPU Module."; + let description = [{ + This pass searches for all GPU Modules in the immediate regions and attaches + a XeVM target if the module matches the name specified by the `module` argument. + + Example: + ``` + // File: in.mlir: + gpu.module @nvvm_module_1 {...} + gpu.module @rocdl_module_2 {...} + gpu.module @xevm_module_3 {...} + // mlir-opt --xevm-attach-target="module=xevm.* chip=pvc" in.mlir + gpu.module @nvvm_module_1 {...} + gpu.module @rocdl_module_2 {...} + gpu.module @xevm_module_3 [#xevm.target] {...} + ``` + }]; + let options = + [Option<"moduleMatcher", "module", "std::string", + /*default=*/[{""}], + "Regex used to identify the modules to attach the target to.">, + Option<"triple", "triple", "std::string", + /*default=*/"\"spirv64-unknown-unknown\"", "Target triple.">, + Option<"chip", "chip", "std::string", + /*default=*/"\"bmg\"", "Target chip.">, + Option<"optLevel", "O", "unsigned", + /*default=*/"2", "Optimization level.">, + ListOption<"linkLibs", "l", "std::string", + "Extra bitcode libraries paths to link to.">, + Option<"cmdOptions", "cmd-options", "std::string", + /*default=*/[{""}], + "Command line options passed to downstream compiler">]; +} #endif // MLIR_DIALECT_GPU_PASSES diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 6895e946b8a45..45a8904375e2b 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -153,14 +153,20 @@ class NVVM_IntrOp traits = [], // NVVM special register op definitions //===----------------------------------------------------------------------===// -class NVVM_SpecialRegisterOp traits = []> : +class NVVM_PureSpecialRegisterOp traits = []> : NVVM_IntrOp { let arguments = (ins); let assemblyFormat = "attr-dict `:` type($res)"; } -class NVVM_SpecialRangeableRegisterOp traits = []> : - NVVM_SpecialRegisterOp traits = []> : + NVVM_IntrOp { + let arguments = (ins); + let assemblyFormat = "attr-dict `:` type($res)"; +} + +class NVVM_PureSpecialRangeableRegisterOp traits = []> : + NVVM_PureSpecialRegisterOp])> { let arguments = (ins OptionalAttr:$range); @@ -189,63 +195,63 @@ class NVVM_SpecialRangeableRegisterOp traits = []> //===----------------------------------------------------------------------===// // Lane, Warp, SM, Grid index and range -def NVVM_LaneIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.laneid">; -def NVVM_WarpSizeOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.warpsize">; -def NVVM_WarpIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.warpid">; -def NVVM_WarpDimOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nwarpid">; -def NVVM_SmIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.smid">; -def NVVM_SmDimOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nsmid">; -def NVVM_GridIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.gridid">; +def NVVM_LaneIdOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.laneid">; +def NVVM_WarpSizeOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.warpsize">; +def NVVM_WarpIdOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.warpid">; +def NVVM_WarpDimOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.nwarpid">; +def NVVM_SmIdOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.smid">; +def NVVM_SmDimOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.nsmid">; +def NVVM_GridIdOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.gridid">; //===----------------------------------------------------------------------===// // Lane Mask Comparison Ops -def NVVM_LaneMaskEqOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.lanemask.eq">; -def NVVM_LaneMaskLeOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.lanemask.le">; -def NVVM_LaneMaskLtOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.lanemask.lt">; -def NVVM_LaneMaskGeOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.lanemask.ge">; -def NVVM_LaneMaskGtOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.lanemask.gt">; +def NVVM_LaneMaskEqOp : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.lanemask.eq">; +def NVVM_LaneMaskLeOp : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.lanemask.le">; +def NVVM_LaneMaskLtOp : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.lanemask.lt">; +def NVVM_LaneMaskGeOp : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.lanemask.ge">; +def NVVM_LaneMaskGtOp : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.lanemask.gt">; //===----------------------------------------------------------------------===// // Thread index and range -def NVVM_ThreadIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.x">; -def NVVM_ThreadIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.y">; -def NVVM_ThreadIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.z">; -def NVVM_BlockDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.x">; -def NVVM_BlockDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.y">; -def NVVM_BlockDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.z">; +def NVVM_ThreadIdXOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.tid.x">; +def NVVM_ThreadIdYOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.tid.y">; +def NVVM_ThreadIdZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.tid.z">; +def NVVM_BlockDimXOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.ntid.x">; +def NVVM_BlockDimYOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.ntid.y">; +def NVVM_BlockDimZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.ntid.z">; //===----------------------------------------------------------------------===// // Block index and range -def NVVM_BlockIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.x">; -def NVVM_BlockIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.y">; -def NVVM_BlockIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.z">; -def NVVM_GridDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.x">; -def NVVM_GridDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.y">; -def NVVM_GridDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.z">; +def NVVM_BlockIdXOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.x">; +def NVVM_BlockIdYOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.y">; +def NVVM_BlockIdZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.z">; +def NVVM_GridDimXOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.x">; +def NVVM_GridDimYOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.y">; +def NVVM_GridDimZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.z">; //===----------------------------------------------------------------------===// // CTA Cluster index and range -def NVVM_ClusterIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.x", [NVVMRequiresSM<90>]>; -def NVVM_ClusterIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.y">; -def NVVM_ClusterIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.z">; -def NVVM_ClusterDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.x">; -def NVVM_ClusterDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.y">; -def NVVM_ClusterDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.z">; +def NVVM_ClusterIdXOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.x", [NVVMRequiresSM<90>]>; +def NVVM_ClusterIdYOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.y">; +def NVVM_ClusterIdZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.z">; +def NVVM_ClusterDimXOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.x">; +def NVVM_ClusterDimYOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.y">; +def NVVM_ClusterDimZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.z">; //===----------------------------------------------------------------------===// // CTA index and range within Cluster -def NVVM_BlockInClusterIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.x", [NVVMRequiresSM<90>]>; -def NVVM_BlockInClusterIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.y", [NVVMRequiresSM<90>]>; -def NVVM_BlockInClusterIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.z", [NVVMRequiresSM<90>]>; -def NVVM_ClusterDimBlocksXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.x", [NVVMRequiresSM<90>]>; -def NVVM_ClusterDimBlocksYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.y", [NVVMRequiresSM<90>]>; -def NVVM_ClusterDimBlocksZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.z">; +def NVVM_BlockInClusterIdXOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.x", [NVVMRequiresSM<90>]>; +def NVVM_BlockInClusterIdYOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.y", [NVVMRequiresSM<90>]>; +def NVVM_BlockInClusterIdZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.z", [NVVMRequiresSM<90>]>; +def NVVM_ClusterDimBlocksXOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.x", [NVVMRequiresSM<90>]>; +def NVVM_ClusterDimBlocksYOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.y", [NVVMRequiresSM<90>]>; +def NVVM_ClusterDimBlocksZOp : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.z">; //===----------------------------------------------------------------------===// // CTA index and across Cluster dimensions -def NVVM_ClusterId : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctarank", [NVVMRequiresSM<90>]>; -def NVVM_ClusterDim : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctarank">; +def NVVM_ClusterId : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctarank", [NVVMRequiresSM<90>]>; +def NVVM_ClusterDim : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctarank">; //===----------------------------------------------------------------------===// // Clock registers @@ -256,7 +262,7 @@ def NVVM_GlobalTimerOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer">; //===----------------------------------------------------------------------===// // envreg registers foreach index = !range(0, 32) in { - def NVVM_EnvReg # index # Op : NVVM_SpecialRegisterOp<"read.ptx.sreg.envreg" # index>; + def NVVM_EnvReg # index # Op : NVVM_PureSpecialRegisterOp<"read.ptx.sreg.envreg" # index>; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td index b5e81d595d74c..f457f47d56219 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/XeVMOps.td @@ -201,25 +201,25 @@ def XeVM_BlockLoad2dOp let description = [{ The `xevm.blockload2d` operation loads a two dimensional matrix tile from a base matrix residing in global memory. The parameters are: - $ptr - the base address of the base matrix containing the tile to load - $base_width - the width of the base matrix in number of bytes. - $base_height - the number of rows in the base matrix - $base_pitch - the physical stride between the first columns of the current - row and the subsequent row in number of bytes. - $x, $y, $tile_width, $tile_height - the starting offsets and shape of - the tile to load in number of elements. - $elem_size_in_bits - the size in bits of the matrix element type + * `ptr` - the base address of the base matrix containing the tile to load + * `base_width` - the width of the base matrix in number of bytes. + * `base_height` - the number of rows in the base matrix + * `base_pitch` - the physical stride between the first columns of the current + row and the subsequent row in number of bytes. + * `x`, `y`, `tile_width`, `tile_height` - the starting offsets and shape of + the tile to load in number of elements. + * `elem_size_in_bits` - the size in bits of the matrix element type - 32 for f32, tf32 - 16 for f16, int16, bf16 - 8 for int8 - $v_blocks - number of consecutive tiles in innermost dimension direction to load - $transpose - transpose the tile in registers (useful for 32 bit element type) - $pack_register - pack element types narrower than register bit width. + * `v_blocks` - number of consecutive tiles in innermost dimension direction to load + * `transpose` - transpose the tile in registers (useful for 32 bit element type) + * `pack_register` - pack element types narrower than register bit width. [M, N] => [M/factor, N, factor] where factor is register_size_in_bits / elem_size_in_bits - $cache_control - an enumerator that sets the cache behaviour + * `cache_control` - an enumerator that sets the cache behaviour Notes: - - the $transpose and $pack_register parameters are mutual exclusive + - the `transpose` and `pack_register` parameters are mutual exclusive - transposing the tile loaded is used for A matrix in backward path or used for the B matrix operand (D = C + A * B), where A has row-major layout and B should have column-major layout in memory. - if the tile loaded contains out of bound elements of the matrix, they are filled with 0. @@ -262,19 +262,19 @@ def XeVM_BlockStore2dOp let description = [{ The `xevm.blockstore2d` operation stores a two dimensional tile into a larger matrix residing in global memory. The parameters are: - $ptr - the base address of the target matrix where to store the tile - $base_width - the width of the base matrix in number of bytes. - $base_height - the number of rows in the base matrix - $base_pitch - the physical stride between the first columns of the current - row and the subsequent row in number of bytes. - $x, $y, $tile_width, $tile_height - the starting offsets and shape of the tile to store + * `ptr` - the base address of the target matrix where to store the tile + * `base_width` - the width of the base matrix in number of bytes. + * `base_height` - the number of rows in the base matrix + * `base_pitch` - the physical stride between the first columns of the current + row and the subsequent row in number of bytes. + * `x`, `y`, `tile_width`, `tile_height` - the starting offsets and shape of the tile to store in number of elements. - $elem_size_in_bits - the size in bits of the matrix element + * `elem_size_in_bits` - the size in bits of the matrix element - 32 for f32, tf32 - 16 for f16, int16, bf16 - 8 for int8 - $cache_control - an enumerator that sets the cache behaviour - $stored_val - the tile to store + * `cache_control` - an enumerator that sets the cache behaviour + * `stored_val` - the tile to store Example: ```mlir @@ -351,10 +351,10 @@ def XeVM_MemfenceOp This operation ensures that all prior memory accesses of this work-item to `addrspace` are visible to all other work-items in `scope`. Parameters description: - $scope - specify the memory scope at which all other work-items should observe - memory operations prior to the fence. - $addrspace - specify the address space of work-item's memory accesses - to be affected by the fence. + * `scope` - specify the memory scope at which all other work-items should observe + memory operations prior to the fence. + * `addrspace` - specify the address space of work-item's memory accesses + to be affected by the fence. }]; let assemblyFormat = [{prop-dict attr-dict}]; @@ -370,9 +370,9 @@ def XeVM_PrefetchOp let summary = "Prefetch data into a cache subsystem."; let description = [{ Work-item issues a prefetch from global memory to cache: - $ptr - LLVM pointer with address space. Address space must be 1 (global) - or 4 (generic) - $cache_control - specify caching options + * `ptr` - LLVM pointer with address space. Address space must be 1 (global) + or 4 (generic) + * `cache_control` - specify caching options }]; let assemblyFormat = [{ operands prop-dict attr-dict `:` `(` type(operands) `)` @@ -395,19 +395,19 @@ def XeVM_BlockPrefetch2dOp let description = [{ The `xevm.blockprefetch2d` operation prefetches a two dimensional tile from a larger base matrix residing in global memory. The parameters are: - $ptr - the base address of the base matrix containing the tile to prefetch - $base_width - the width of the base matrix in number of bytes. - $base_height - the number of rows in the base matrix - $base_pitch - the physical stride between the first columns of the current - row and the subsequent row in number of bytes. - $x, $y, $tile_width, $tile_height - the starting offsets and shape of tile - to prefetch in number of elements. - $elem_size_in_bits - the size in bits of the matrix element - - 32 for f32, bf32 - - 16 for f16, int16, bf16 - - 8 for int8, int4, int2 - $v_blocks - number of tiles in innermost dimension direction to prefetch - $cache_control - an enumerator that sets the cache behaviour + * `ptr` - the base address of the base matrix containing the tile to prefetch + * `base_width` - the width of the base matrix in number of bytes. + * `base_height` - the number of rows in the base matrix + * `base_pitch` - the physical stride between the first columns of the current + row and the subsequent row in number of bytes. + * `x`, `y`, `tile_width`, `tile_height` - the starting offsets and shape of tile + to prefetch in number of elements. + * `elem_size_in_bits` - the size in bits of the matrix element + - 32 for f32, bf32 + - 16 for f16, int16, bf16 + - 8 for int8, int4, int2 + * `v_blocks` - number of tiles in innermost dimension direction to prefetch + * `cache_control` - an enumerator that sets the cache behaviour Example: ```mlir @@ -452,9 +452,9 @@ def XeVM_ElemTypeAttr : I32EnumAttr<"ElemType", "XeVM element type", def XeVM_MMAShapeAttr : XeVM_Attr<"MMAShape", "mma_shape"> { let description = [{ MMA operation is represented as D=AxB+C, where - A has the shape MxK. - B has the shape KxN. - D and C have the shape MxN. + - A has the shape MxK. + - B has the shape KxN. + - D and C have the shape MxN. This attribute encodes the shape of all matrices that participate in MMA. }]; let parameters = (ins "int":$m, "int":$n, "int":$k); @@ -484,17 +484,17 @@ def XeVM_MMAOp D = C + A x B where the A, B, C input matrices and the result D have shapes: - D : MxN - C : MxN - A : MxK - B : KxN + - D : MxN + - C : MxN + - A : MxK + - B : KxN Parameters: - `a` - vector of matrix A elements. - `b` - vector of matrix B elements. - `c` - (optional) vector of matrix C elements. - `shape` - the shape of the matrices, specified as `M`, `N`, and `K` values. - `types` - the data types of the matrices, specified as `D`, `A`, `B`, and optionally `C`. + * `a` - vector of matrix A elements. + * `b` - vector of matrix B elements. + * `c` - (optional) vector of matrix C elements. + * `shape` - the shape of the matrices, specified as `M`, `N`, and `K` values. + * `types` - the data types of the matrices, specified as `D`, `A`, `B`, and optionally `C`. Example: ```mlir diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td index 9ff2507629856..9123ac34af67d 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td @@ -180,6 +180,36 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> { return ::mlir::acc::VariableTypeCategory::uncategorized; }] >, + InterfaceMethod< + /*description=*/[{ + Generates the operations that would be normally placed in a recipe's + init region. It inserts at the builder's current location. + It can be used either to directly "inline" the init region + or if the caller sets the insertion point to inside a recipe body, + it fills it in. This does not generate the `acc.yield` that normally + would terminate a recipe. + + The `extents` are optional and can be empty - it is only when a + slice of the private variable needs allocation. + The `initVal` can be empty - it is primarily needed for reductions + to ensure the variable is also initialized with appropriate value. + + If the return value is empty, it means that recipe body was not + successfully generated. + }], + /*retTy=*/"::mlir::Value", + /*methodName=*/"generatePrivateInit", + /*args=*/(ins "::mlir::OpBuilder &":$builder, + "::mlir::Location":$loc, + "::mlir::TypedValue<::mlir::acc::MappableType>":$var, + "::llvm::StringRef":$varName, + "::mlir::ValueRange":$extents, + "::mlir::Value":$initVal), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return {}; + }] + >, ]; } diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td index 46a705eefc262..65771b602e0d0 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td @@ -462,16 +462,19 @@ def SPIRV_DotOp : SPIRV_Op<"Dot", }]; let arguments = (ins - SPIRV_VectorOf:$vector1, - SPIRV_VectorOf:$vector2 + SPIRV_VectorOf:$vector1, + SPIRV_VectorOf:$vector2 ); let results = (outs - SPIRV_Float:$result + SPIRV_AnyFloat:$result ); let assemblyFormat = "operands attr-dict `:` type($vector1) `->` type($result)"; + // Require dynamic availability specification based on operand/result type. + bit autogenAvailability = 0; + let hasVerifier = 0; } diff --git a/mlir/include/mlir/Dialect/X86Vector/X86Vector.td b/mlir/include/mlir/Dialect/X86Vector/X86Vector.td index 3bf0be0a716aa..73f6877c12fab 100644 --- a/mlir/include/mlir/Dialect/X86Vector/X86Vector.td +++ b/mlir/include/mlir/Dialect/X86Vector/X86Vector.td @@ -420,6 +420,62 @@ def DotOp : AVX_LowOp<"dot", [Pure, }]; } +//----------------------------------------------------------------------------// +// AVX Int8 Dot +//----------------------------------------------------------------------------// + +def DotInt8Op : AVX_Op<"dot.i8", [Pure, + X86IntrinsicOpInterface, + AllTypesMatch<["a", "b"]>, + AllTypesMatch<["w", "dst"]>, + TypesMatchWith<"`a` has four times elements as `w`", + "w", "a", + "VectorType::get({::llvm::cast($_self).getShape()[0] * 4}, " + "IntegerType::get($_self.getContext(), 8))"> + ]> { + let summary = "Dot Int8 op"; + let description = [{ + The `dot` op is an AVX2-Int8 specific op that can lower to the proper + LLVMAVX2-INT8 operation `llvm.vpdpbssd` depending on the width of MLIR + vectors it is applied to. + + #### From the Intel Intrinsics Guide: + + Multiply groups of 4 adjacent pairs of signed 8-bit integers in `a` with + corresponding signed 8-bit integers in `b`, producing 4 intermediate signed 16-bit + results. Sum these 4 results with the corresponding 32-bit integer in `w`, and + store the packed 32-bit results in `dst`. + + Example: + ```mlir + %dst = x86vector.avx.dot.i8 %w, %a, %b : vector<32xi8> -> vector<8xi32> + ``` + }]; + let arguments = (ins VectorOfLengthAndType<[4, 8], [I32]>:$w, + VectorOfLengthAndType<[16, 32], [I8]>:$a, + VectorOfLengthAndType<[16, 32], [I8]>:$b + ); + let results = (outs VectorOfLengthAndType<[4, 8], [I32]>:$dst); + let assemblyFormat = + "$w `,` $a `,` $b attr-dict `:` type($a) `->` type($w)"; + + let extraClassDeclaration = [{ + std::string getIntrinsicName() { + std::string intr = "llvm.x86.avx2.vpdpbssd"; + VectorType vecType = getW().getType(); + unsigned elemBitWidth = vecType.getElementTypeBitWidth(); + unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth; + intr += "." + std::to_string(opBitWidth); + return intr; + } + + SmallVector getIntrinsicOperands( + ::mlir::ArrayRef operands, + const ::mlir::LLVMTypeConverter &typeConverter, + ::mlir::RewriterBase &rewriter); + }]; +} + //----------------------------------------------------------------------------// // AVX: Convert BF16/F16 to F32 and broadcast into packed F32 //----------------------------------------------------------------------------// diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h index 0f2d0e45008cc..d5a9a2c3aeba7 100644 --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -32,6 +32,7 @@ #include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h" #include "mlir/Conversion/UBToLLVM/UBToLLVM.h" #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" +#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h" #include "mlir/Dialect/AMX/Transforms.h" #include "mlir/Dialect/Affine/TransformOps/AffineTransformOps.h" #include "mlir/Dialect/ArmNeon/TransformOps/ArmNeonVectorTransformOps.h" @@ -91,6 +92,7 @@ inline void registerAllExtensions(DialectRegistry ®istry) { gpu::registerConvertGpuToLLVMInterface(registry); NVVM::registerConvertGpuToNVVMInterface(registry); vector::registerConvertVectorToLLVMInterface(registry); + registerConvertXeVMToLLVMInterface(registry); // Register all transform dialect extensions. affine::registerTransformDialectExtension(registry); diff --git a/mlir/include/mlir/TableGen/Class.h b/mlir/include/mlir/TableGen/Class.h index f750a34a3b2ba..349ea54954feb 100644 --- a/mlir/include/mlir/TableGen/Class.h +++ b/mlir/include/mlir/TableGen/Class.h @@ -332,13 +332,23 @@ class Method : public ClassDeclarationBase { : properties(properties), methodSignature(std::forward(retType), std::forward(name), std::forward(args)...), - methodBody(properties & Declaration) {} + methodBody(properties & Declaration) { + if (!methodPropertiesAreCompatible(properties)) { + llvm::report_fatal_error( + "Invalid combination of method properties specified"); + } + } /// Create a method with a return type, a name, method properties, and a list /// of parameters. Method(StringRef retType, StringRef name, Properties properties, std::initializer_list params) : properties(properties), methodSignature(retType, name, params), - methodBody(properties & Declaration) {} + methodBody(properties & Declaration) { + if (!methodPropertiesAreCompatible(properties)) { + llvm::report_fatal_error( + "Invalid combination of method properties specified"); + } + } // Define move constructor and assignment operator to prevent copying. Method(Method &&) = default; @@ -402,6 +412,10 @@ class Method : public ClassDeclarationBase { MethodBody methodBody; /// Deprecation message if the method is deprecated. std::optional deprecationMessage; + + /// Utility method to verify method properties correctness. + [[maybe_unused]] static bool + methodPropertiesAreCompatible(Properties properties); }; /// This enum describes C++ inheritance visibility. diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h index 79e8bb6add0da..5d52cf3f04b6a 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -15,6 +15,7 @@ #define MLIR_TARGET_LLVMIR_MODULETRANSLATION_H #include "mlir/Dialect/LLVMIR/LLVMInterfaces.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Operation.h" #include "mlir/IR/SymbolTable.h" #include "mlir/IR/Value.h" @@ -24,6 +25,7 @@ #include "mlir/Target/LLVMIR/TypeToLLVM.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/FPEnv.h" namespace llvm { @@ -108,6 +110,41 @@ class ModuleTranslation { return blockMapping.lookup(block); } + /// Find the LLVM-IR loop that represents an MLIR loop. + llvm::CanonicalLoopInfo *lookupOMPLoop(omp::NewCliOp mlir) const { + llvm::CanonicalLoopInfo *result = loopMapping.lookup(mlir); + assert(result && "attempt to get non-existing loop"); + return result; + } + + /// Find the LLVM-IR loop that represents an MLIR loop. + llvm::CanonicalLoopInfo *lookupOMPLoop(Value mlir) const { + return lookupOMPLoop(mlir.getDefiningOp()); + } + + /// Mark an OpenMP loop as having been consumed. + void invalidateOmpLoop(omp::NewCliOp mlir) { loopMapping.erase(mlir); } + + /// Mark an OpenMP loop as having been consumed. + void invalidateOmpLoop(Value mlir) { + invalidateOmpLoop(mlir.getDefiningOp()); + } + + /// Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR + /// OpenMPIRBuilder CanonicalLoopInfo + void mapOmpLoop(omp::NewCliOp mlir, llvm::CanonicalLoopInfo *llvm) { + assert(llvm && "argument must be non-null"); + llvm::CanonicalLoopInfo *&cur = loopMapping[mlir]; + assert(cur == nullptr && "attempting to map a loop that is already mapped"); + cur = llvm; + } + + /// Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR + /// OpenMPIRBuilder CanonicalLoopInfo + void mapOmpLoop(Value mlir, llvm::CanonicalLoopInfo *llvm) { + mapOmpLoop(mlir.getDefiningOp(), llvm); + } + /// Stores the mapping between an MLIR operation with successors and a /// corresponding LLVM IR instruction. void mapBranch(Operation *mlir, llvm::Instruction *llvm) { @@ -381,6 +418,12 @@ class ModuleTranslation { DenseMap valueMapping; DenseMap blockMapping; + /// List of not yet consumed MLIR loop handles (represented by an omp.new_cli + /// operation which creates a value of type CanonicalLoopInfoType) and their + /// LLVM-IR representation as CanonicalLoopInfo which is managed by the + /// OpenMPIRBuilder. + DenseMap loopMapping; + /// A mapping between MLIR LLVM dialect terminators and LLVM IR terminators /// they are converted to. This allows for connecting PHI nodes to the source /// values after all operations are converted. diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index e4b4974600577..24a48993ad80c 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -73,3 +73,4 @@ add_subdirectory(VectorToLLVM) add_subdirectory(VectorToSCF) add_subdirectory(VectorToSPIRV) add_subdirectory(VectorToXeGPU) +add_subdirectory(XeVMToLLVM) diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index 77a2708653576..7ac9687c4eeda 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -41,6 +41,16 @@ template struct OpenMPOpConversion : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + OpenMPOpConversion(LLVMTypeConverter &typeConverter, + PatternBenefit benefit = 1) + : ConvertOpToLLVMPattern(typeConverter, benefit) { + // Operations using CanonicalLoopInfoType are lowered only by + // mlir::translateModuleToLLVMIR() using the OpenMPIRBuilder. Until then, + // the type and operations using it must be preserved. + typeConverter.addConversion( + [&](::mlir::omp::CanonicalLoopInfoType type) { return type; }); + } + LogicalResult matchAndRewrite(T op, typename T::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp index b89fde4fbc17e..c1f40dcbd5ca0 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -807,6 +807,7 @@ class MaxPool2dConverter : public OpConversionPattern { ValueRange{paddedInput, fakeWindowDims}, filledEmptyTensor, strideAttr, dilationAttr); + rewriter.setInsertionPointAfter(op); rewriter.replaceOp(op, resultOp); // NaN propagation has no meaning for non floating point types. diff --git a/mlir/lib/Conversion/XeVMToLLVM/CMakeLists.txt b/mlir/lib/Conversion/XeVMToLLVM/CMakeLists.txt new file mode 100644 index 0000000000000..4ac60d8d43472 --- /dev/null +++ b/mlir/lib/Conversion/XeVMToLLVM/CMakeLists.txt @@ -0,0 +1,21 @@ +add_mlir_conversion_library(MLIRXeVMToLLVM + XeVMToLLVM.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/XeVMToLLVM + + DEPENDS + MLIRConversionPassIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRFuncDialect + MLIRGPUDialect + MLIRLLVMCommonConversion + MLIRLLVMDialect + MLIRXeVMDialect + MLIRPass + MLIRTransforms +) diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp new file mode 100644 index 0000000000000..a8380b9669f0f --- /dev/null +++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp @@ -0,0 +1,636 @@ +//===-- XeVMToLLVM.cpp - XeVM to LLVM dialect conversion --------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h" + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" +#include "mlir/Conversion/LLVMCommon/Pattern.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/XeVMDialect.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "llvm/Support/FormatVariadic.h" + +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Types.h" + +#include "llvm/ADT/TypeSwitch.h" + +namespace mlir { +#define GEN_PASS_DEF_CONVERTXEVMTOLLVMPASS +#include "mlir/Conversion/Passes.h.inc" +} // namespace mlir + +using namespace mlir; +using namespace xevm; + +namespace { + +struct LLVMFuncAttributeOptions { + bool isConvergent = false; + bool isNoUnwind = false; + bool isWillReturn = false; + LLVM::MemoryEffectsAttr memEffectsAttr{}; +}; +static constexpr LLVMFuncAttributeOptions noUnwindAttrs = { + false, true, false, {}}; +static constexpr LLVMFuncAttributeOptions noUnwindWillReturnAttrs = { + false, true, true, {}}; +static constexpr LLVMFuncAttributeOptions convergentNoUnwindWillReturnAttrs = { + true, true, true, {}}; + +std::string getTypeMangling(Type ty, bool isUnsigned = false) { + return TypeSwitch(ty) + .Case([isUnsigned](VectorType ty) -> std::string { + return "Dv" + std::to_string(ty.getNumElements()) + "_" + + getTypeMangling(ty.getElementType(), isUnsigned); + }) + .Case([](Float16Type) -> std::string { return "Dh"; }) + .Case([](Float32Type) -> std::string { return "f"; }) + .Case([](Float64Type) -> std::string { return "d"; }) + .Case([isUnsigned](IntegerType ty) -> std::string { + switch (ty.getWidth()) { + case 8: + return isUnsigned ? "h" : "c"; + case 16: + return isUnsigned ? "t" : "s"; + case 32: + return isUnsigned ? "j" : "i"; + case 64: + return isUnsigned ? "m" : "l"; + default: + llvm_unreachable("unhandled integer type"); + } + }) + .Default([](Type) -> std::string { + llvm_unreachable("unhandled type for mangling"); + }); +} + +std::string mangle(StringRef baseName, ArrayRef types, + ArrayRef isUnsigned = {}) { + assert((isUnsigned.empty() || isUnsigned.size() == types.size()) && + "Signedness info doesn't match"); + std::string s; + llvm::raw_string_ostream os(s); + llvm::SmallDenseMap substitutions; + os << "_Z" << baseName.size() << baseName; + for (auto [idx, type] : llvm::enumerate(types)) { + auto it = substitutions.find(type); + if (it != substitutions.end()) { + os << "S"; + // First substitution is `S_`, second is `S0_`, and so on. + if (unsigned firstIdx = it->getSecond(); firstIdx > 0) + os << firstIdx - 1; + os << "_"; + } else { + if (!type.isIntOrFloat()) + substitutions[type] = substitutions.size(); + os << getTypeMangling(type, isUnsigned.empty() ? false : isUnsigned[idx]); + } + } + return os.str(); +} + +template +int32_t getL1CacheControl(OpType op) { + int32_t control = 0; + if constexpr (isLoad) { + switch (*op.getCacheControl()) { + case LoadCacheControl::L1UC_L2UC_L3UC: + case LoadCacheControl::L1UC_L2UC_L3C: + case LoadCacheControl::L1UC_L2C_L3UC: + case LoadCacheControl::L1UC_L2C_L3C: + control = 1; + break; + case LoadCacheControl::L1C_L2UC_L3UC: + case LoadCacheControl::L1C_L2UC_L3C: + case LoadCacheControl::L1C_L2C_L3UC: + case LoadCacheControl::L1C_L2C_L3C: + control = 2; + break; + case LoadCacheControl::L1S_L2UC_L3UC: + case LoadCacheControl::L1S_L2UC_L3C: + case LoadCacheControl::L1S_L2C_L3UC: + case LoadCacheControl::L1S_L2C_L3C: + control = 3; + break; + case LoadCacheControl::INVALIDATE_READ: + control = 4; + break; + } + } else { + switch (*op.getCacheControl()) { + case StoreCacheControl::L1UC_L2UC_L3UC: + case StoreCacheControl::L1UC_L2UC_L3WB: + case StoreCacheControl::L1UC_L2WB_L3UC: + case StoreCacheControl::L1UC_L2WB_L3WB: + control = 1; + break; + case StoreCacheControl::L1WT_L2UC_L3UC: + case StoreCacheControl::L1WT_L2UC_L3WB: + case StoreCacheControl::L1WT_L2WB_L3UC: + case StoreCacheControl::L1WT_L2WB_L3WB: + control = 2; + break; + case StoreCacheControl::L1S_L2UC_L3UC: + case StoreCacheControl::L1S_L2UC_L3WB: + case StoreCacheControl::L1S_L2WB_L3UC: + case StoreCacheControl::L1S_L2WB_L3WB: + control = 3; + break; + case StoreCacheControl::L1WB_L2UC_L3UC: + case StoreCacheControl::L1WB_L2WB_L3UC: + case StoreCacheControl::L1WB_L2UC_L3WB: + control = 4; + break; + } + } + return control; +} + +template +int32_t getL3CacheControl(OpType op) { + int32_t control = 0; + if constexpr (isLoad) { + switch (*op.getCacheControl()) { + case LoadCacheControl::L1UC_L2UC_L3UC: + case LoadCacheControl::L1UC_L2C_L3UC: + case LoadCacheControl::L1C_L2UC_L3UC: + case LoadCacheControl::L1C_L2C_L3UC: + case LoadCacheControl::L1S_L2UC_L3UC: + case LoadCacheControl::L1S_L2C_L3UC: + control = 1; + break; + case LoadCacheControl::L1UC_L2UC_L3C: + case LoadCacheControl::L1UC_L2C_L3C: + case LoadCacheControl::L1C_L2UC_L3C: + case LoadCacheControl::L1C_L2C_L3C: + case LoadCacheControl::L1S_L2UC_L3C: + case LoadCacheControl::L1S_L2C_L3C: + control = 2; + break; + case LoadCacheControl::INVALIDATE_READ: + control = 4; + break; + } + } else { + switch (*op.getCacheControl()) { + case StoreCacheControl::L1UC_L2UC_L3UC: + case StoreCacheControl::L1UC_L2WB_L3UC: + case StoreCacheControl::L1WT_L2UC_L3UC: + case StoreCacheControl::L1WT_L2WB_L3UC: + case StoreCacheControl::L1S_L2UC_L3UC: + case StoreCacheControl::L1S_L2WB_L3UC: + case StoreCacheControl::L1WB_L2UC_L3UC: + case StoreCacheControl::L1WB_L2WB_L3UC: + control = 1; + break; + case StoreCacheControl::L1UC_L2UC_L3WB: + case StoreCacheControl::L1UC_L2WB_L3WB: + case StoreCacheControl::L1WT_L2UC_L3WB: + case StoreCacheControl::L1WT_L2WB_L3WB: + case StoreCacheControl::L1S_L2UC_L3WB: + case StoreCacheControl::L1S_L2WB_L3WB: + case StoreCacheControl::L1WB_L2UC_L3WB: + control = 2; + break; + } + } + return control; +} + +template +static std::optional +getCacheControlMetadata(ConversionPatternRewriter &rewriter, OpType op) { + if (!op.getCacheControl()) + return {}; + constexpr int32_t decorationCacheControlArity{4}; + constexpr int32_t loadCacheControlKey{6442}; + constexpr int32_t storeCacheControlKey{6443}; + const int32_t controlKey{isLoad ? loadCacheControlKey : storeCacheControlKey}; + SmallVector decorationsL1{ + controlKey, 0, getL1CacheControl(op), 0}; + SmallVector decorationsL3{ + controlKey, 1, getL3CacheControl(op), 0}; + auto arrayAttrL1 = rewriter.getI32ArrayAttr(decorationsL1); + auto arrayAttrL3 = rewriter.getI32ArrayAttr(decorationsL3); + + SmallVector combinedAttrs = {arrayAttrL1, arrayAttrL3}; + return rewriter.getArrayAttr(combinedAttrs); +} + +static LLVM::CallOp createDeviceFunctionCall( + ConversionPatternRewriter &rewriter, StringRef funcName, Type retType, + ArrayRef argTypes, ArrayRef args, + mlir::ArrayRef> paramAttrs, + LLVMFuncAttributeOptions funcAttributeOptions, Operation *op) { + auto moduleOp = op->getParentWithTrait(); + assert(moduleOp && "Expecting module"); + Location loc = op->getLoc(); + + auto funcOpRes = + LLVM::lookupOrCreateFn(rewriter, moduleOp, funcName, argTypes, retType); + assert(!failed(funcOpRes)); + LLVM::LLVMFuncOp funcOp = funcOpRes.value(); + funcOp.setCConv(LLVM::cconv::CConv::SPIR_FUNC); + funcOp.setConvergent(funcAttributeOptions.isConvergent); + funcOp.setNoUnwind(funcAttributeOptions.isNoUnwind); + funcOp.setWillReturn(funcAttributeOptions.isWillReturn); + + if (funcAttributeOptions.memEffectsAttr) + funcOp.setMemoryEffectsAttr(funcAttributeOptions.memEffectsAttr); + + for (auto [idx, attrName] : paramAttrs) + funcOp.setArgAttr(idx, attrName, rewriter.getUnitAttr()); + + auto callOp = rewriter.create(loc, funcOp, args); + callOp->setAttrs(funcOp->getAttrs()); + + return callOp; +} + +class MMAToOCLPattern : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(xevm::MMAOp op, xevm::MMAOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + if (!op.getC()) { + return rewriter.notifyMatchFailure(op, "OCL requires C operand"); + } + auto precisionA = op.getTypes().getA(); + auto precisionB = op.getTypes().getB(); + auto precisionC = op.getTypes().getC(); + auto precisionD = op.getTypes().getD(); + if (precisionC != precisionD) { + return rewriter.notifyMatchFailure(op, "type of C and D need to match"); + } + if (precisionC != xevm::ElemType::S32 && + precisionC != xevm::ElemType::F32 && + precisionC != xevm::ElemType::F16 && + precisionC != xevm::ElemType::BF16) { + return rewriter.notifyMatchFailure( + op, "type of C and D must be S32, F32, F16 or BF16"); + } + if (precisionA == xevm::ElemType::S32 || + precisionA == xevm::ElemType::F32) { + return rewriter.notifyMatchFailure(op, "type of A cannot be S32 or F32"); + } + if (precisionB == xevm::ElemType::S32 || + precisionB == xevm::ElemType::F32) { + return rewriter.notifyMatchFailure(op, "type of B cannot be S32 or F32"); + } + constexpr uint32_t bitWidthPackedA{16}; + constexpr uint32_t bitWidthPackedB{32}; + auto loc = op.getLoc(); + + auto castIfNeeded = [&](Value val, Type packedType) -> Value { + VectorType origTy = cast(val.getType()); + const uint32_t vecBitSize = + origTy.getNumElements() * + origTy.getElementType().getIntOrFloatBitWidth(); + VectorType newTy = VectorType::get( + vecBitSize / packedType.getIntOrFloatBitWidth(), packedType); + if (origTy != newTy) + val = rewriter.create(loc, newTy, val); + return val; + }; + + Value a = op.getA(); + Type packedAType = (op.getTypes().getA() == xevm::ElemType::TF32) + ? cast(rewriter.getF32Type()) + : rewriter.getIntegerType(bitWidthPackedA); + a = castIfNeeded(a, packedAType); + + Value b = op.getB(); + Type packedBType = (op.getTypes().getB() == xevm::ElemType::TF32) + ? cast(rewriter.getF32Type()) + : rewriter.getIntegerType(bitWidthPackedB); + b = castIfNeeded(b, packedBType); + + Value c = op.getC(); + VectorType cOrigTy = cast(c.getType()); + VectorType resOrigTy = cast(op->getResultTypes()[0]); + assert(cOrigTy == resOrigTy && "Accumulator and result type mismatch"); + // OCL builtins encode bfloat16 as int16 + VectorType cTy = + cOrigTy.getElementType().isBF16() + ? VectorType::get(cOrigTy.getShape(), rewriter.getIntegerType(16)) + : cOrigTy; + VectorType resTy = cTy; + if (cOrigTy != cTy) + c = rewriter.create(loc, cTy, c); + + constexpr int32_t systolicDepth{8}; + std::string fnName = + llvm::formatv("intel_sub_group_{0}_{1}_matrix_mad_k{2}", + stringifyElemType(op.getTypes().getA()).str(), + stringifyElemType(op.getTypes().getB()).str(), + systolicDepth * + getNumOperandsPerDword(op.getTypes().getA())) + .str(); + SmallVector argTypes{a.getType(), b.getType(), cTy}; + fnName = mangle(fnName, argTypes); + SmallVector args{a, b, c}; + + auto memAttr = rewriter.getAttr( + /*other=*/LLVM::ModRefInfo::NoModRef, + /*argMem=*/LLVM::ModRefInfo::NoModRef, + /*inaccessibleMem=*/LLVM::ModRefInfo::NoModRef); + auto funcAttrs = convergentNoUnwindWillReturnAttrs; + funcAttrs.memEffectsAttr = memAttr; + Value result = + createDeviceFunctionCall(rewriter, fnName, resTy, argTypes, args, {}, + funcAttrs, op.getOperation()) + ->getResult(0); + + if (resOrigTy != resTy) + result = rewriter.create(loc, resOrigTy, result); + + rewriter.replaceOp(op, result); + return success(); + } + +private: + static unsigned getNumOperandsPerDword(xevm::ElemType pTy) { + switch (pTy) { + case xevm::ElemType::TF32: + return 1; + case xevm::ElemType::BF16: + case xevm::ElemType::F16: + return 2; + case xevm::ElemType::U8: + case xevm::ElemType::S8: + return 4; + default: + llvm_unreachable("unsupported xevm::ElemType"); + } + } +}; + +class PrefetchToOCLPattern : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(PrefetchOp op, PrefetchOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + const std::string fnName{"_Z8prefetchPU3AS1Kcm"}; + Value one = + rewriter.create(loc, rewriter.getI64Type(), 1); + SmallVector args{op.getPtr(), one}; + SmallVector argTypes; + for (auto arg : args) + argTypes.push_back(arg.getType()); + auto funcAttr = noUnwindAttrs; + auto memAttr = rewriter.getAttr( + /*other=*/LLVM::ModRefInfo::NoModRef, + /*argMem=*/LLVM::ModRefInfo::Ref, + /*inaccessibleMem=*/LLVM::ModRefInfo::NoModRef); + funcAttr.memEffectsAttr = memAttr; + + LLVM::CallOp call = createDeviceFunctionCall( + rewriter, fnName, LLVM::LLVMVoidType::get(rewriter.getContext()), + argTypes, args, {}, funcAttr, op.getOperation()); + if (std::optional optCacheControls = + getCacheControlMetadata(rewriter, op)) + call->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls); + rewriter.eraseOp(op); + return success(); + } +}; + +class MemfenceToOCLPattern : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(MemfenceOp op, MemfenceOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + const std::string fnName{"atomic_work_item_fence"}; + int memScope, addrSpace; + switch (op.getAddrspace()) { + case xevm::AddrSpace::SHARED: + addrSpace = 1; // CLK_LOCAL_MEM_FENCE + break; + case xevm::AddrSpace::GLOBAL: + addrSpace = 2; // CLK_GLOBAL_MEM_FENCE + break; + default: + // GENERIC is not supported in OpenCL + return rewriter.notifyMatchFailure( + op, "Fence only supports global and shared address spaces."); + } + switch (op.getScope()) { + case xevm::MemScope::WORKGROUP: + memScope = 1; + break; + case xevm::MemScope::DEVICE: + memScope = 2; + break; + default: + // CLUSTER and SYSTEM are not supported in OpenCL + return rewriter.notifyMatchFailure( + op, "Fence only supports workgroup and device memory scopes."); + } + Type i32Type = rewriter.getI32Type(); + Value acqRel = rewriter.create(loc, i32Type, 4); + Value memScopeConst = + rewriter.create(loc, i32Type, memScope); + Value addrSpaceConst = + rewriter.create(loc, i32Type, addrSpace); + SmallVector args{addrSpaceConst, acqRel, memScopeConst}; + SmallVector argTypes{3, i32Type}; + createDeviceFunctionCall(rewriter, mangle(fnName, argTypes), + LLVM::LLVMVoidType::get(rewriter.getContext()), + argTypes, args, {}, noUnwindAttrs, + op.getOperation()); + rewriter.eraseOp(op); + return success(); + } +}; +template +class LoadStorePrefetchToOCLPattern : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(OpType op, typename OpType::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + constexpr bool isLoad = std::is_same_v; + constexpr bool isPrefetch = std::is_same_v; + + auto loc = op.getLoc(); + VectorType vecType; + bool packReg = false; + bool transpose = false; + if constexpr (isLoad) { + vecType = op.getRes().getType(); + packReg = op.getPackRegister(); + transpose = op.getTranspose(); + } else if constexpr (!isPrefetch) { + vecType = op.getStoredVal().getType(); + } + + auto i32Type = rewriter.getI32Type(); + Value byteCoord = + rewriter.create(loc, VectorType::get(2, i32Type)); + Value zero = rewriter.create(loc, i32Type, 0); + Value one = rewriter.create(loc, i32Type, 1); + byteCoord = rewriter.create( + loc, VectorType::get(2, i32Type), byteCoord, op.getX(), zero); + byteCoord = rewriter.create( + loc, VectorType::get(2, i32Type), byteCoord, op.getY(), one); + SmallVector args{op.getPtr(), op.getBaseWidth(), op.getBaseHeight(), + op.getBasePitch(), byteCoord}; + SmallVector retTypes; + Value spvLoadDstPtr; + std::string funcName{"intel_sub_group_2d_block_"}; + std::string bitWidthId; + LLVMFuncAttributeOptions funcAttr{noUnwindWillReturnAttrs}; + SmallVector, 4> paramAttrs; + if constexpr (isPrefetch) { // Prefetch + funcName += "prefetch"; + paramAttrs = {std::make_pair(0, LLVM::LLVMDialect::getNonNullAttrName())}; + auto memAttr = rewriter.getAttr( + /*other=*/LLVM::ModRefInfo::NoModRef, + /*argMem=*/LLVM::ModRefInfo::Ref, + /*inaccessibleMem=*/LLVM::ModRefInfo::NoModRef); + funcAttr = noUnwindAttrs; + funcAttr.memEffectsAttr = memAttr; + } else { + auto vecElemType = vecType.getElementType(); + auto vecElemBitWidth = vecElemType.getIntOrFloatBitWidth(); + Value numElems = rewriter.create( + loc, i32Type, vecType.getNumElements()); + auto dstOrSrcPtr = rewriter.create( + loc, LLVM::LLVMPointerType::get(rewriter.getContext()), vecElemType, + numElems); + args.push_back(dstOrSrcPtr); + if constexpr (isLoad) { // Load + funcName += "read"; + bitWidthId = getTypeMangling(vecElemType, /*isUnsigned=*/true); + if (packReg) + funcName += "_transform"; + else if (transpose) + funcName += "_transpose"; + spvLoadDstPtr = dstOrSrcPtr; + retTypes.push_back(vecType); + paramAttrs = { + std::make_pair(0, LLVM::LLVMDialect::getNonNullAttrName()), + std::make_pair(0, LLVM::LLVMDialect::getReadonlyAttrName()), + std::make_pair(5, LLVM::LLVMDialect::getNonNullAttrName()), + std::make_pair(5, LLVM::LLVMDialect::getWriteOnlyAttrName()), + }; + } else { // Store + funcName += "write"; + bitWidthId = (vecElemBitWidth == 32) + ? "j" + : ((vecElemBitWidth == 16) ? "t" : "h"); + rewriter.create(loc, op.getStoredVal(), dstOrSrcPtr); + paramAttrs = { + std::make_pair(0, LLVM::LLVMDialect::getNonNullAttrName()), + std::make_pair(0, LLVM::LLVMDialect::getWriteOnlyAttrName()), + std::make_pair(5, LLVM::LLVMDialect::getNonNullAttrName()), + std::make_pair(5, LLVM::LLVMDialect::getReadonlyAttrName()), + }; + } + } + + funcName = + llvm::formatv("{0}_{1}b_{2}r{3}x{4}c", funcName, op.getElemSizeInBits(), + op.getTileHeight(), op.getTileWidth(), op.getVBlocks()) + .str(); + std::string prefetchCode(""); + if (!isPrefetch) + prefetchCode += "P"; + funcName = llvm::formatv("_Z{0}{1}PU3AS1viiiDv2_i{2}{3}", funcName.size(), + funcName, prefetchCode, bitWidthId) + .str(); + SmallVector argTypes; + for (auto arg : args) { + argTypes.push_back(arg.getType()); + } + LLVM::CallOp call = createDeviceFunctionCall( + rewriter, funcName, LLVM::LLVMVoidType::get(rewriter.getContext()), + argTypes, args, paramAttrs, funcAttr, op.getOperation()); + if (std::optional optCacheControls = + getCacheControlMetadata < isLoad || isPrefetch > (rewriter, op)) { + call->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls); + } + if constexpr (isLoad) + rewriter.replaceOp( + op, rewriter.create(loc, vecType, spvLoadDstPtr)); + else + rewriter.eraseOp(op); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// Pass Definition +//===----------------------------------------------------------------------===// + +struct ConvertXeVMToLLVMPass + : public impl::ConvertXeVMToLLVMPassBase { + using Base::Base; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + ConversionTarget target(getContext()); + target.addLegalDialect(); + target.addIllegalDialect(); + RewritePatternSet patterns(&getContext()); + populateXeVMToLLVMConversionPatterns(patterns); + if (failed(applyPartialConversion(getOperation(), target, + std::move(patterns)))) + signalPassFailure(); + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// ConvertToLLVMPatternInterface implementation +//===----------------------------------------------------------------------===// + +namespace { +/// Implement the interface to convert XeVM to LLVM. +struct XeVMToLLVMDialectInterface : public ConvertToLLVMPatternInterface { + using ConvertToLLVMPatternInterface::ConvertToLLVMPatternInterface; + void loadDependentDialects(MLIRContext *context) const final { + context->loadDialect(); + } + + /// Hook for derived dialect interface to provide conversion patterns + /// and mark dialect legal for the conversion target. + void populateConvertToLLVMConversionPatterns( + ConversionTarget &target, LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns) const final { + populateXeVMToLLVMConversionPatterns(patterns); + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Pattern Population +//===----------------------------------------------------------------------===// + +void ::mlir::populateXeVMToLLVMConversionPatterns(RewritePatternSet &patterns) { + patterns.add, + LoadStorePrefetchToOCLPattern, + LoadStorePrefetchToOCLPattern, + MMAToOCLPattern, MemfenceToOCLPattern, PrefetchToOCLPattern>( + patterns.getContext()); +} + +void ::mlir::registerConvertXeVMToLLVMInterface(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, XeVMDialect *dialect) { + dialect->addInterfaces(); + }); +} diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 4862d1f722785..f2f010a771b77 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -44,6 +44,7 @@ add_mlir_dialect_library(MLIRGPUTransforms Transforms/ShuffleRewriter.cpp Transforms/SubgroupIdRewriter.cpp Transforms/SubgroupReduceLowering.cpp + Transforms/XeVMAttachTarget.cpp OBJECT @@ -78,6 +79,7 @@ add_mlir_dialect_library(MLIRGPUTransforms MLIRSupport MLIRTransformUtils MLIRVectorDialect + MLIRXeVMDialect ) add_subdirectory(TransformOps) diff --git a/mlir/lib/Dialect/GPU/Transforms/XeVMAttachTarget.cpp b/mlir/lib/Dialect/GPU/Transforms/XeVMAttachTarget.cpp new file mode 100644 index 0000000000000..e9cf4939a13b8 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Transforms/XeVMAttachTarget.cpp @@ -0,0 +1,92 @@ +//===-- XeVMAttachTarget.cpp - Attach an XeVM target ----------------------===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the `GpuXeVMAttachTarget` pass, attaching `#xevm.target` +// attributes to GPU modules. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/Transforms/Passes.h" + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/XeVMDialect.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "llvm/Support/Regex.h" + +namespace mlir { +#define GEN_PASS_DEF_GPUXEVMATTACHTARGET +#include "mlir/Dialect/GPU/Transforms/Passes.h.inc" +} // namespace mlir + +using namespace mlir; +using namespace mlir::xevm; + +namespace { +struct XeVMAttachTarget + : public mlir::impl::GpuXeVMAttachTargetBase { + using Base::Base; + + DictionaryAttr getFlags(OpBuilder &builder) const; + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } +}; +} // namespace + +DictionaryAttr XeVMAttachTarget::getFlags(OpBuilder &builder) const { + SmallVector flags; + // Tokenize and set the optional command line options. + if (!cmdOptions.empty()) { + std::pair> options = + gpu::TargetOptions::tokenizeCmdOptions(cmdOptions); + if (!options.second.empty()) { + llvm::SmallVector xevmOptionAttrs; + for (const char *opt : options.second) { + xevmOptionAttrs.emplace_back( + mlir::StringAttr::get(builder.getContext(), StringRef(opt))); + } + flags.push_back(builder.getNamedAttr( + "cmd-options", + mlir::ArrayAttr::get(builder.getContext(), xevmOptionAttrs))); + } + } + + if (!flags.empty()) + return builder.getDictionaryAttr(flags); + return nullptr; +} + +void XeVMAttachTarget::runOnOperation() { + OpBuilder builder(&getContext()); + ArrayRef libs(linkLibs); + SmallVector filesToLink(libs); + auto target = builder.getAttr( + optLevel, triple, chip, getFlags(builder), + filesToLink.empty() ? nullptr : builder.getStrArrayAttr(filesToLink)); + llvm::Regex matcher(moduleMatcher); + for (Region ®ion : getOperation()->getRegions()) + for (Block &block : region.getBlocks()) + for (auto module : block.getOps()) { + // Check if the name of the module matches. + if (!moduleMatcher.empty() && !matcher.match(module.getName())) + continue; + // Create the target array. + SmallVector targets; + if (std::optional attrs = module.getTargets()) + targets.append(attrs->getValue().begin(), attrs->getValue().end()); + targets.push_back(target); + // Remove any duplicate targets. + targets.erase(llvm::unique(targets), targets.end()); + // Update the target attribute array. + module.setTargetsAttr(builder.getArrayAttr(targets)); + } +} diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 80c807e774a7e..f2eab62b286af 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -293,22 +293,15 @@ static LogicalResult checkVarAndVarType(Op op) { if (!op.getVar()) return op.emitError("must have var operand"); - if (mlir::isa(op.getVar().getType()) && - mlir::isa(op.getVar().getType())) { - // TODO: If a type implements both interfaces (mappable and pointer-like), - // it is unclear which semantics to apply without additional info which - // would need captured in the data operation. For now restrict this case - // unless a compelling reason to support disambiguating between the two. - return op.emitError("var must be mappable or pointer-like (not both)"); - } - + // A variable must have a type that is either pointer-like or mappable. if (!mlir::isa(op.getVar().getType()) && !mlir::isa(op.getVar().getType())) return op.emitError("var must be mappable or pointer-like"); - if (mlir::isa(op.getVar().getType()) && - op.getVarType() != op.getVar().getType()) - return op.emitError("varType must match when var is mappable"); + // When it is a pointer-like type, the varType must capture the target type. + if (mlir::isa(op.getVar().getType()) && + op.getVarType() == op.getVar().getType()) + return op.emitError("varType must capture the element type of var"); return success(); } diff --git a/mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt b/mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt index 1a8f30dd39871..b9aa7b7491abf 100644 --- a/mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/SPIRV/IR/CMakeLists.txt @@ -7,9 +7,9 @@ add_mlir_dialect_library(MLIRSPIRVDialect CastOps.cpp ControlFlowOps.cpp CooperativeMatrixOps.cpp + DotProductOps.cpp GroupOps.cpp ImageOps.cpp - IntegerDotProductOps.cpp MemoryOps.cpp MeshOps.cpp SPIRVAttributes.cpp diff --git a/mlir/lib/Dialect/SPIRV/IR/IntegerDotProductOps.cpp b/mlir/lib/Dialect/SPIRV/IR/DotProductOps.cpp similarity index 83% rename from mlir/lib/Dialect/SPIRV/IR/IntegerDotProductOps.cpp rename to mlir/lib/Dialect/SPIRV/IR/DotProductOps.cpp index f5676f36a0f5f..01ef1bdc42515 100644 --- a/mlir/lib/Dialect/SPIRV/IR/IntegerDotProductOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/DotProductOps.cpp @@ -1,4 +1,4 @@ -//===- IntegerDotProductOps.cpp - MLIR SPIR-V Integer Dot Product Ops ----===// +//===- DotProductOps.cpp - MLIR SPIR-V Dot Product Ops -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// Defines the Integer Dot Product operations in the SPIR-V dialect. +// Defines the Dot Product operations in the SPIR-V dialect. // //===----------------------------------------------------------------------===// @@ -21,6 +21,44 @@ using namespace mlir::spirv::AttrNames; namespace mlir::spirv { +//===----------------------------------------------------------------------===// +// Dot Product ops +//===----------------------------------------------------------------------===// + +static std::optional getDotProductMinVersion() { + return spirv::Version::V_1_0; // Available in SPIR-V >= 1.0. +} + +static std::optional getDotProductMaxVersion() { + return spirv::Version::V_1_6; // Available in SPIR-V <= 1.6. +} + +SmallVector, 1> DotOp::getExtensions() { + if (isa(getType())) { + static const auto extension = spirv::Extension::SPV_KHR_bfloat16; + return {extension}; + } + + return {}; +} + +SmallVector, 1> DotOp::getCapabilities() { + if (isa(getType())) { + static const auto capability = spirv::Capability::BFloat16DotProductKHR; + return {capability}; + } + + return {}; +} + +std::optional DotOp::getMinVersion() { + return getDotProductMinVersion(); +} + +std::optional DotOp::getMaxVersion() { + return getDotProductMaxVersion(); +} + //===----------------------------------------------------------------------===// // Integer Dot Product ops //===----------------------------------------------------------------------===// @@ -71,14 +109,6 @@ static LogicalResult verifyIntegerDotProduct(Operation *op) { return success(); } -static std::optional getIntegerDotProductMinVersion() { - return spirv::Version::V_1_0; // Available in SPIR-V >= 1.0. -} - -static std::optional getIntegerDotProductMaxVersion() { - return spirv::Version::V_1_6; // Available in SPIR-V <= 1.6. -} - static SmallVector, 1> getIntegerDotProductExtensions() { // Requires the SPV_KHR_integer_dot_product extension, specified either @@ -136,10 +166,10 @@ getIntegerDotProductCapabilities(Operation *op) { return getIntegerDotProductCapabilities(*this); \ } \ std::optional OpName::getMinVersion() { \ - return getIntegerDotProductMinVersion(); \ + return getDotProductMinVersion(); \ } \ std::optional OpName::getMaxVersion() { \ - return getIntegerDotProductMaxVersion(); \ + return getDotProductMaxVersion(); \ } SPIRV_IMPL_INTEGER_DOT_PRODUCT_OP(SDotOp) diff --git a/mlir/lib/Dialect/Tensor/Extensions/MeshShardingExtensions.cpp b/mlir/lib/Dialect/Tensor/Extensions/MeshShardingExtensions.cpp index fc93f1c1c9220..26406ceef082c 100644 --- a/mlir/lib/Dialect/Tensor/Extensions/MeshShardingExtensions.cpp +++ b/mlir/lib/Dialect/Tensor/Extensions/MeshShardingExtensions.cpp @@ -11,10 +11,6 @@ #include "mlir/Dialect/Tensor/IR/ShardingInterfaceImpl.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/DialectRegistry.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "tensor-sharding-impl" -#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ") using namespace mlir; using namespace mlir::tensor; diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp index 5b65e47bc937b..2c9cd87f14af2 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" diff --git a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp index 3e3422162a8da..4ec13e189f621 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp @@ -10,7 +10,6 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Interfaces/InferTypeOpInterface.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index a3e863254405c..b035a53692dcf 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -35,11 +35,8 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/LogicalResult.h" #include "llvm/Support/MathExtras.h" -#include #include -#include using namespace mlir; using namespace mlir::tensor; diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp index 92540bd56ecbc..437bc5d00faa8 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @@ -10,15 +10,11 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Arith/Utils/Utils.h" -#include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Tensor/Utils/Utils.h" -#include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/TilingInterface.h" -#include "mlir/Interfaces/ValueBoundsOpInterface.h" using namespace mlir; using namespace mlir::tensor; diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp index 829b2ab92ac24..47b41efbed83b 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -15,7 +15,6 @@ #include "mlir/Dialect/Bufferization/IR/DstBufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/Transforms/SubsetInsertionOpInterfaceImpl.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" diff --git a/mlir/lib/Dialect/Tensor/Transforms/ConcatOpPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/ConcatOpPatterns.cpp index a2a860fcb38ab..20bed05ecc11d 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/ConcatOpPatterns.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/ConcatOpPatterns.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/Transforms/Transforms.h" #include "mlir/IR/PatternMatch.h" diff --git a/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp index fa748cf01977f..3c2b0ab42f7a6 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp @@ -9,7 +9,6 @@ #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/Transforms/Transforms.h" #include "mlir/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" using namespace mlir; using namespace mlir::tensor; diff --git a/mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp b/mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp index e0acaee9f6626..dd50ae54d17cc 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/ExtractSliceFromReshapeUtils.cpp @@ -11,11 +11,9 @@ // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/Transforms/TransformUtils.h" -#include "mlir/Dialect/Tensor/Transforms/Transforms.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/IR/BuiltinTypes.h" diff --git a/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp index a787b485f7162..13de55b0672a5 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp @@ -16,14 +16,11 @@ #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/Transforms/Passes.h" #include "mlir/Dialect/Tensor/Transforms/Transforms.h" -#include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Dialect/Vector/Utils/VectorUtils.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BuiltinAttributes.h" -#include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "llvm/ADT/TypeSwitch.h" #include namespace mlir { diff --git a/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp index 4655fa3cf0d23..bad56d4111dca 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp @@ -11,7 +11,6 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/Transforms/Transforms.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Interfaces/ValueBoundsOpInterface.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp index 657624b817af2..20bb4d1caf019 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/ReshapePatterns.cpp @@ -13,7 +13,6 @@ #include "mlir/IR/PatternMatch.h" #include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/LogicalResult.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp index 6138821ee8c61..6e3285abffbfc 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp @@ -13,7 +13,6 @@ #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Interfaces/RuntimeVerifiableOpInterface.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp index 289296a07d9d3..3f6258b5e4d43 100644 --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -13,10 +13,8 @@ #include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" -#include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Interfaces/ValueBoundsOpInterface.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp b/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp index be29298a35aeb..d3a5f44798106 100644 --- a/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp @@ -13,7 +13,6 @@ #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/DialectRegistry.h" -#include "llvm/Support/Debug.h" #define DEBUG_TYPE "tosa-sharding-impl" #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ") diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp index 1d21096e8920b..2dd45d27157cb 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -15,20 +15,14 @@ #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" -#include "mlir/Dialect/Tosa/Utils/QuantUtils.h" -#include "mlir/Dialect/Tosa/Utils/ShapeUtils.h" #include "mlir/IR/BuiltinTypeInterfaces.h" #include "mlir/IR/BuiltinTypes.h" -#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/FoldUtils.h" #include "mlir/Transforms/InliningUtils.h" -#include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/TypeSwitch.h" #include diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 5170a11523845..4a952ac062cad 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -22,12 +22,10 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/Matchers.h" -#include "mlir/IR/PatternMatch.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/APFloat.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TypeSwitch.h" #include diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp index 9b4cf85c480d3..f6caa2a985a4d 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp @@ -15,7 +15,6 @@ #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" #include "mlir/IR/BuiltinTypes.h" -#include "mlir/Pass/Pass.h" using namespace mlir; using namespace mlir::tosa; diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp index ea6ac981b53cc..df6d52615478e 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp @@ -18,8 +18,6 @@ #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" -#include "mlir/Dialect/Tosa/Utils/ShapeUtils.h" -#include "mlir/Pass/Pass.h" using namespace mlir; using namespace mlir::tosa; diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp index 9c6658c9a5bf8..d33ebe397cd35 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaFolders.cpp @@ -20,9 +20,6 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectResourceBlobManager.h" #include "mlir/IR/Matchers.h" -#include "mlir/Pass/Pass.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/SmallVector.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp index 29ec9f8db2615..a9e98c8908e15 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp @@ -18,9 +18,7 @@ #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Utils/ShapeUtils.h" #include "mlir/IR/Builders.h" -#include "mlir/IR/ImplicitLocOpBuilder.h" #include "mlir/Interfaces/InferTypeOpInterface.h" -#include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" namespace mlir { diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp index f4ce950828646..aae1ba359e859 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp @@ -13,8 +13,6 @@ #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/Tosa/IR/TosaOps.h" -#include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" namespace mlir { diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp index 7997753469527..8f96fc1b80abe 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp @@ -11,12 +11,9 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" -#include "mlir/Dialect/Tosa/Utils/QuantUtils.h" -#include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" namespace mlir { diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaOptionalDecompositions.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaOptionalDecompositions.cpp index 2092379e65368..ec1865a3bede1 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaOptionalDecompositions.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaOptionalDecompositions.cpp @@ -15,8 +15,6 @@ #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/Tosa/IR/TosaOps.h" -#include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" namespace mlir { diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaReduceTransposes.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaReduceTransposes.cpp index 7f85cd52f6bde..8ebbbc94eb6a2 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaReduceTransposes.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaReduceTransposes.cpp @@ -72,9 +72,7 @@ #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" #include "mlir/IR/Iterators.h" -#include "mlir/IR/Matchers.h" #include "llvm/ADT/TypeSwitch.h" -#include #include #include diff --git a/mlir/lib/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp b/mlir/lib/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp index 12257da878a40..a963b3f063a8a 100644 --- a/mlir/lib/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp +++ b/mlir/lib/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp @@ -8,9 +8,7 @@ #include "mlir/Dialect/Transform/DebugExtension/DebugExtensionOps.h" -#include "mlir/Dialect/Transform/IR/TransformDialect.h" #include "mlir/Dialect/Transform/IR/TransformTypes.h" -#include "mlir/IR/OpImplementation.h" #include "llvm/Support/InterleavedRange.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp b/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp index 4a95fe7459e8c..a500228d68c77 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp @@ -8,7 +8,6 @@ #include "mlir/Dialect/Transform/IR/TransformDialect.h" #include "mlir/Analysis/CallGraph.h" -#include "mlir/Dialect/Transform/IR/TransformAttrs.h" #include "mlir/Dialect/Transform/IR/TransformOps.h" #include "mlir/Dialect/Transform/IR/TransformTypes.h" #include "mlir/Dialect/Transform/IR/Utils.h" diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 0db0317461c03..9266a63a0038a 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -9,7 +9,6 @@ #include "mlir/Dialect/Transform/IR/TransformOps.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" -#include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Transform/IR/TransformAttrs.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" @@ -23,11 +22,9 @@ #include "mlir/IR/OperationSupport.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Verifier.h" -#include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/FunctionImplementation.h" #include "mlir/Interfaces/FunctionInterfaces.h" -#include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" #include "mlir/Transforms/CSE.h" diff --git a/mlir/lib/Dialect/Transform/LoopExtension/LoopExtensionOps.cpp b/mlir/lib/Dialect/Transform/LoopExtension/LoopExtensionOps.cpp index 34d6221d15fb0..95870e8ef87be 100644 --- a/mlir/lib/Dialect/Transform/LoopExtension/LoopExtensionOps.cpp +++ b/mlir/lib/Dialect/Transform/LoopExtension/LoopExtensionOps.cpp @@ -8,8 +8,6 @@ #include "mlir/Dialect/Transform/LoopExtension/LoopExtensionOps.h" -#include "mlir/IR/OpImplementation.h" -#include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/LoopInvariantCodeMotionUtils.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Transform/PDLExtension/PDLExtensionOps.cpp b/mlir/lib/Dialect/Transform/PDLExtension/PDLExtensionOps.cpp index 85f61245eb734..41955c8a278f2 100644 --- a/mlir/lib/Dialect/Transform/PDLExtension/PDLExtensionOps.cpp +++ b/mlir/lib/Dialect/Transform/PDLExtension/PDLExtensionOps.cpp @@ -9,7 +9,6 @@ #include "mlir/Dialect/Transform/PDLExtension/PDLExtensionOps.h" #include "mlir/Dialect/PDL/IR/PDLOps.h" #include "mlir/IR/Builders.h" -#include "mlir/IR/OpImplementation.h" #include "mlir/Rewrite/FrozenRewritePatternSet.h" #include "mlir/Rewrite/PatternApplicator.h" #include "llvm/ADT/ScopeExit.h" diff --git a/mlir/lib/Dialect/Transform/Transforms/CheckUses.cpp b/mlir/lib/Dialect/Transform/Transforms/CheckUses.cpp index bfe1d9682177d..18dfd504203a9 100644 --- a/mlir/lib/Dialect/Transform/Transforms/CheckUses.cpp +++ b/mlir/lib/Dialect/Transform/Transforms/CheckUses.cpp @@ -15,7 +15,6 @@ #include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -#include "mlir/Pass/Pass.h" #include "llvm/ADT/SetOperations.h" namespace mlir { diff --git a/mlir/lib/Dialect/Transform/Transforms/InferEffects.cpp b/mlir/lib/Dialect/Transform/Transforms/InferEffects.cpp index 20db09ca9e8d5..364453431db7e 100644 --- a/mlir/lib/Dialect/Transform/Transforms/InferEffects.cpp +++ b/mlir/lib/Dialect/Transform/Transforms/InferEffects.cpp @@ -12,7 +12,6 @@ #include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" #include "mlir/IR/Visitors.h" #include "mlir/Interfaces/FunctionInterfaces.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" #include "llvm/ADT/DenseSet.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Transform/Transforms/TransformInterpreterUtils.cpp b/mlir/lib/Dialect/Transform/Transforms/TransformInterpreterUtils.cpp index 44d82714b894b..35ace1b2e0c3a 100644 --- a/mlir/lib/Dialect/Transform/Transforms/TransformInterpreterUtils.cpp +++ b/mlir/lib/Dialect/Transform/Transforms/TransformInterpreterUtils.cpp @@ -18,11 +18,9 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Verifier.h" #include "mlir/IR/Visitors.h" -#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Parser/Parser.h" #include "mlir/Support/FileUtilities.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/SourceMgr.h" diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index bcaea1c79471f..fe2707629d82e 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -1088,6 +1088,12 @@ class ExtractOpFromElementwise final if (!llvm::all_equal(eltwise->getOperandTypes())) return rewriter.notifyMatchFailure(op, "operand types are different"); + // Dynamic position can cause dominance issues, so conservatively fail for + // now. + if (!op.getDynamicPosition().empty()) + return rewriter.notifyMatchFailure( + op, "dynamic position not yet implemented"); + Type dstType = op.getType(); OpBuilder::InsertionGuard g(rewriter); diff --git a/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp b/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp index cc7ab7f3f3895..179e6ee8784e6 100644 --- a/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp +++ b/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp @@ -86,6 +86,29 @@ x86vector::DotOp::getIntrinsicOperands(ArrayRef operands, return intrinsicOperands; } +SmallVector x86vector::DotInt8Op::getIntrinsicOperands( + ArrayRef operands, const LLVMTypeConverter &typeConverter, + RewriterBase &rewriter) { + SmallVector intrinsicOprnds; + Adaptor adaptor(operands, *this); + intrinsicOprnds.push_back(adaptor.getW()); + // Bitcast `a` and `b` to i32 + Value bitcast_a = rewriter.create( + getLoc(), + VectorType::get((getA().getType().getShape()[0] / 4), + rewriter.getIntegerType(32)), + adaptor.getA()); + intrinsicOprnds.push_back(bitcast_a); + Value bitcast_b = rewriter.create( + getLoc(), + VectorType::get((getB().getType().getShape()[0] / 4), + rewriter.getIntegerType(32)), + adaptor.getB()); + intrinsicOprnds.push_back(bitcast_b); + + return intrinsicOprnds; +} + SmallVector x86vector::BcstToPackedF32Op::getIntrinsicOperands( ArrayRef operands, const LLVMTypeConverter &typeConverter, RewriterBase &rewriter) { diff --git a/mlir/lib/TableGen/Class.cpp b/mlir/lib/TableGen/Class.cpp index c65f67d50a47d..81f1aee73a7f0 100644 --- a/mlir/lib/TableGen/Class.cpp +++ b/mlir/lib/TableGen/Class.cpp @@ -159,6 +159,38 @@ void Method::writeDefTo(raw_indented_ostream &os, StringRef namePrefix) const { os << "}\n\n"; } +bool Method::methodPropertiesAreCompatible(Properties properties) { + const bool isStatic = (properties & Method::Static); + const bool isConstructor = (properties & Method::Constructor); + // const bool isPrivate = (properties & Method::Private); + const bool isDeclaration = (properties & Method::Declaration); + const bool isInline = (properties & Method::Inline); + const bool isConstexprValue = (properties & Method::ConstexprValue); + const bool isConst = (properties & Method::Const); + + // Note: assert to immediately fail and thus simplify debugging. + if (isStatic && isConstructor) { + assert(false && "constructor cannot be static"); + return false; + } + if (isConstructor && isConst) { // albeit constexpr is fine + assert(false && "constructor cannot be const"); + return false; + } + if (isDeclaration && isInline) { + assert(false && + "declaration implies no definition and thus cannot be inline"); + return false; + } + if (isDeclaration && isConstexprValue) { + assert(false && + "declaration implies no definition and thus cannot be constexpr"); + return false; + } + + return true; +} + //===----------------------------------------------------------------------===// // Constructor definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 336f71ccd06a3..68a8d1758e434 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3095,6 +3095,67 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } +/// Convert an omp.canonical_loop to LLVM-IR +static LogicalResult +convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + llvm::OpenMPIRBuilder::LocationDescription loopLoc(builder); + Value loopIV = op.getInductionVar(); + Value loopTC = op.getTripCount(); + + llvm::Value *llvmTC = moduleTranslation.lookupValue(loopTC); + + llvm::Expected llvmOrError = + ompBuilder->createCanonicalLoop( + loopLoc, + [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *llvmIV) { + // Register the mapping of MLIR induction variable to LLVM-IR + // induction variable + moduleTranslation.mapValue(loopIV, llvmIV); + + builder.restoreIP(ip); + llvm::Expected bodyGenStatus = + convertOmpOpRegions(op.getRegion(), "omp.loop.region", builder, + moduleTranslation); + + return bodyGenStatus.takeError(); + }, + llvmTC, "omp.loop"); + if (!llvmOrError) + return op.emitError(llvm::toString(llvmOrError.takeError())); + + llvm::CanonicalLoopInfo *llvmCLI = *llvmOrError; + llvm::IRBuilderBase::InsertPoint afterIP = llvmCLI->getAfterIP(); + builder.restoreIP(afterIP); + + // Register the mapping of MLIR loop to LLVM-IR OpenMPIRBuilder loop + if (Value cli = op.getCli()) + moduleTranslation.mapOmpLoop(cli, llvmCLI); + + return success(); +} + +/// Apply a `#pragma omp unroll` / "!$omp unroll" transformation using the +/// OpenMPIRBuilder. +static LogicalResult +applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + Value applyee = op.getApplyee(); + assert(applyee && "Loop to apply unrolling on required"); + + llvm::CanonicalLoopInfo *consBuilderCLI = + moduleTranslation.lookupOMPLoop(applyee); + llvm::OpenMPIRBuilder::LocationDescription loc(builder); + ompBuilder->unrollLoopHeuristic(loc.DL, consBuilderCLI); + + moduleTranslation.invalidateOmpLoop(applyee); + return success(); +} + /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. static llvm::AtomicOrdering convertAtomicOrdering(std::optional ao) { @@ -5989,6 +6050,23 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, // etc. and then discarded return success(); }) + .Case([&](omp::NewCliOp op) { + // Meta-operation: Doesn't do anything by itself, but used to + // identify a loop. + return success(); + }) + .Case([&](omp::CanonicalLoopOp op) { + return convertOmpCanonicalLoopOp(op, builder, moduleTranslation); + }) + .Case([&](omp::UnrollHeuristicOp op) { + // FIXME: Handling omp.unroll_heuristic as an executable requires + // that the generator (e.g. omp.canonical_loop) has been seen first. + // For construct that require all codegen to occur inside a callback + // (e.g. OpenMPIRBilder::createParallel), all codegen of that + // contained region including their transformations must occur at + // the omp.canonical_loop. + return applyUnrollHeuristic(op, builder, moduleTranslation); + }) .Default([&](Operation *inst) { return inst->emitError() << "not yet implemented: " << inst->getName(); diff --git a/mlir/python/mlir/dialects/TransformTuneExtensionOps.td b/mlir/python/mlir/dialects/TransformTuneExtensionOps.td index ff3047592ab12..c622c31e2c736 100644 --- a/mlir/python/mlir/dialects/TransformTuneExtensionOps.td +++ b/mlir/python/mlir/dialects/TransformTuneExtensionOps.td @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#ifndef PYTHON_BINDINGS_TRANSFORM_DEBUG_EXTENSION_OPS -#define PYTHON_BINDINGS_TRANSFORM_DEBUG_EXTENSION_OPS +#ifndef PYTHON_BINDINGS_TRANSFORM_TUNE_EXTENSION_OPS +#define PYTHON_BINDINGS_TRANSFORM_TUNE_EXTENSION_OPS include "mlir/Dialect/Transform/TuneExtension/TuneExtensionOps.td" -#endif // PYTHON_BINDINGS_TRANSFORM_DEBUG_EXTENSION_OPS +#endif // PYTHON_BINDINGS_TRANSFORM_TUNE_EXTENSION_OPS diff --git a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir index 99ab0e1dc4eef..27fd74e12d36e 100644 --- a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir +++ b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir @@ -967,6 +967,22 @@ func.func @reduction_minui(%v : vector<3xi32>, %s: i32) -> i32 { // ----- +module attributes { spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> } { + +// CHECK-LABEL: func @reduction_bf16_addf_mulf +// CHECK-SAME: (%[[ARG0:.+]]: vector<4xbf16>, %[[ARG1:.+]]: vector<4xbf16>) +// CHECK: %[[DOT:.+]] = spirv.Dot %[[ARG0]], %[[ARG1]] : vector<4xbf16> -> bf16 +// CHECK: return %[[DOT]] : bf16 +func.func @reduction_bf16_addf_mulf(%arg0: vector<4xbf16>, %arg1: vector<4xbf16>) -> bf16 { + %mul = arith.mulf %arg0, %arg1 : vector<4xbf16> + %red = vector.reduction , %mul : vector<4xbf16> into bf16 + return %red : bf16 +} + +} // end module + +// ----- + // CHECK-LABEL: @shape_cast_same_type // CHECK-SAME: (%[[ARG0:.*]]: vector<2xf32>) // CHECK: return %[[ARG0]] diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir new file mode 100644 index 0000000000000..bdbb12bbe0cbb --- /dev/null +++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir @@ -0,0 +1,244 @@ +// RUN: mlir-opt --convert-xevm-to-llvm --split-input-file %s | FileCheck %s + +// Same below, but using the `ConvertToLLVMPatternInterface` entry point +// and the generic `convert-to-llvm` pass. +// RUN: mlir-opt --convert-to-llvm --split-input-file %s | FileCheck %s + +// CHECK-LABEL: llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt( +// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, +// CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return} +// CHECK: llvm.func @blockload2d(%[[ARG0:.*]]: !llvm.ptr<1>, +// CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32) +llvm.func @blockload2d(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32, %base_pitch_a: i32, %x: i32, %y: i32) -> vector<8xi16> { + // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32> + // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32> + // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32> + // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32 + // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i16 : (i32) -> !llvm.ptr + // CHECK: llvm.call spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt( + // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]]) + // CHECK-SAME: {function_type = !llvm.func, i32, i32, i32, vector<2xi32>, ptr)>, + // CHECK-SAME: linkage = #llvm.linkage, no_unwind, sym_name = + // CHECK-SAME: "_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt", visibility_ = 0 : i64, + // CHECK-SAME: will_return} : + // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, + // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> () + // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi16> + %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y + <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=1 : i32, transpose=false, + pack_register=false}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi16> + llvm.return %loaded_a : vector<8xi16> +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt( +llvm.func @blockload2d_cache_control(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32, %base_pitch_a: i32, %x: i32, %y: i32) -> vector<8xi16> { + // CHECK: xevm.DecorationCacheControl = + // CHECK-SAME: 6442 : i32, 0 : i32, 1 : i32, 0 : i32 + // CHECK-SAME: 6442 : i32, 1 : i32, 1 : i32, 0 : i32 + %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y + <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=1 : i32, transpose=false, + pack_register=false, cache_control=#xevm.load_cache_control}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi16> + llvm.return %loaded_a : vector<8xi16> +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt( +// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, +// CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return} +// CHECK: llvm.func @blockload2d_v_blocks(%[[ARG0:.*]]: !llvm.ptr<1>, +// CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32) +llvm.func @blockload2d_v_blocks(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32, %base_pitch_a: i32, %x: i32, %y: i32) -> vector<16xi16> { + // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32> + // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32> + // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32> + // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(16 : i32) : i32 + // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i16 : (i32) -> !llvm.ptr + // CHECK: llvm.call spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt( + // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]]) + // CHECK-SAME: {function_type = !llvm.func, i32, i32, i32, vector<2xi32>, ptr)>, + // CHECK-SAME: linkage = #llvm.linkage, no_unwind, sym_name = + // CHECK-SAME: "_Z41intel_sub_group_2d_block_read_16b_8r16x2cPU3AS1viiiDv2_iPt", visibility_ = 0 : i64, + // CHECK-SAME: will_return} + // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, + // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> () + // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<16xi16> + %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y + <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=2 : i32, transpose=false, + pack_register=false}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<16xi16> + llvm.return %loaded_a : vector<16xi16> +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj( +// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, +// CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return} +// CHECK: llvm.func @blockload2d_pack_register(%[[ARG0:.*]]: !llvm.ptr<1>, +// CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32) +llvm.func @blockload2d_pack_register(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32, %base_pitch_a: i32, %x: i32, %y: i32) -> vector<8xi32> { + // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32> + // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32> + // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32> + // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32 + // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr + // CHECK: llvm.call spir_funccc @_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj( + // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]]) + // CHECK-SAME: {function_type = !llvm.func, i32, i32, i32, vector<2xi32>, ptr)>, + // CHECK-SAME: linkage = #llvm.linkage, no_unwind, sym_name = + // CHECK-SAME: "_Z52intel_sub_group_2d_block_read_transform_16b_16r16x1cPU3AS1viiiDv2_iPj", visibility_ = 0 : i64, + // CHECK-SAME: will_return} : + // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, + // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> () + // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi32> + %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y + <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=16 : i32, v_blocks=1 : i32, transpose=false, + pack_register=true}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32> + llvm.return %loaded_a : vector<8xi32> +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj( +// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, +// CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) attributes {no_unwind, will_return} +// CHECK: llvm.func @blockload2d_transpose(%[[ARG0:.*]]: !llvm.ptr<1>, +// CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32) +llvm.func @blockload2d_transpose(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32, %base_pitch_a: i32, %x: i32, %y: i32) -> vector<8xi32> { + // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32> + // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32> + // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32> + // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32 + // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr + // CHECK: llvm.call spir_funccc @_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj( + // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]]) + // CHECK-SAME: {function_type = !llvm.func, i32, i32, i32, vector<2xi32>, ptr)>, + // CHECK-SAME: linkage = #llvm.linkage, no_unwind, sym_name = + // CHECK-SAME: "_Z51intel_sub_group_2d_block_read_transpose_32b_16r8x1cPU3AS1viiiDv2_iPj", visibility_ = 0 : i64, + // CHECK-SAME: will_return} + // CHECK-SAME: (!llvm.ptr<1> {llvm.nonnull, llvm.readonly}, i32, i32, i32, vector<2xi32>, + // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.writeonly}) -> () + // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR6]] : !llvm.ptr -> vector<8xi32> + %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y + <{elem_size_in_bits=32 : i32, tile_width=8 : i32, tile_height=16 : i32, v_blocks=1 : i32, transpose=true, + pack_register=false}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32> + llvm.return %loaded_a : vector<8xi32> +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj( +// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull, llvm.writeonly}, i32, i32, i32, vector<2xi32>, +// CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.readonly}) attributes {no_unwind, will_return} +// CHECK: llvm.func @blockstore2d(%[[ARG0:.*]]: !llvm.ptr<1>, +// CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32, %[[ARG6:.*]]: vector<8xi32>) { +llvm.func @blockstore2d(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i32, %base_pitch_c: i32, %x: i32, %y: i32, %c_result_casted: vector<8xi32>) { + // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32> + // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32> + // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32> + // CHECK: %[[VAR5:.*]] = llvm.mlir.constant(8 : i32) : i32 + // CHECK: %[[VAR6:.*]] = llvm.alloca %[[VAR5]] x i32 : (i32) -> !llvm.ptr + // CHECK: llvm.store %[[ARG6]], %[[VAR6]] : vector<8xi32>, !llvm.ptr + // CHECK: llvm.call spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj( + // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]], %[[VAR6]]) + // CHECK-SAME: {function_type = !llvm.func, i32, i32, i32, vector<2xi32>, ptr)>, + // CHECK-SAME: linkage = #llvm.linkage, no_unwind, sym_name = + // CHECK-SAME: "_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj", visibility_ = 0 : i64, + // CHECK-SAME: will_return} + // CHECK-SAME: : (!llvm.ptr<1> {llvm.nonnull, llvm.writeonly}, i32, i32, i32, vector<2xi32>, + // CHECK-SAME: !llvm.ptr {llvm.nonnull, llvm.readonly}) -> () + xevm.blockstore2d %c, %base_width_c, %base_height_c, %base_pitch_c, %x, %y, %c_result_casted + <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32}> + : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>) + llvm.return +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj( +llvm.func @blockstore2d_cache_control(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i32, %base_pitch_c: i32, %x: i32, %y: i32, %c_result_casted: vector<8xi32>) { + // CHECK: xevm.DecorationCacheControl = + // CHECK-SAME: 6443 : i32, 0 : i32, 2 : i32, 0 : i32 + // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32, 0 : i32 + xevm.blockstore2d %c, %base_width_c, %base_height_c, %base_pitch_c, %x, %y, %c_result_casted + <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32, cache_control = #xevm.store_cache_control}> + : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>) + llvm.return +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z44intel_sub_group_2d_block_prefetch_8b_8r32x1cPU3AS1viiiDv2_i( +// CHECK-SAME: !llvm.ptr<1> {llvm.nonnull}, i32, i32, i32, vector<2xi32>) attributes +// CHECK-SAME: {memory_effects = #llvm.memory_effects, no_unwind} +// CHECK: llvm.func @blockprefetch2d(%[[ARG0:.*]]: !llvm.ptr<1>, +// CHECK-SAME: %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32, %[[ARG3:.*]]: i32, %[[ARG4:.*]]: i32, %[[ARG5:.*]]: i32) { +llvm.func @blockprefetch2d(%ptr: !llvm.ptr<1>, %base_width: i32, %base_height: i32, %base_pitch: i32, %x: i32, %y: i32) { + // CHECK: %[[VAR0:.*]] = llvm.mlir.undef : vector<2xi32> + // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[VAR3:.*]] = llvm.insertelement %[[ARG4]], %[[VAR0]][%[[VAR1]] : i32] : vector<2xi32> + // CHECK: %[[VAR4:.*]] = llvm.insertelement %[[ARG5]], %[[VAR3]][%[[VAR2]] : i32] : vector<2xi32> + // CHECK: llvm.call spir_funccc @_Z44intel_sub_group_2d_block_prefetch_8b_8r32x1cPU3AS1viiiDv2_i( + // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[VAR4]]) + // CHECK-SAME: {function_type = !llvm.func, i32, i32, i32, vector<2xi32>)>, linkage = #llvm.linkage, + // CHECK-SAME: memory_effects = #llvm.memory_effects, no_unwind, + // CHECK-SAME: sym_name = "_Z44intel_sub_group_2d_block_prefetch_8b_8r32x1cPU3AS1viiiDv2_i", visibility_ = 0 : i64 + xevm.blockprefetch2d %ptr, %base_width, %base_height, %base_pitch, %x, %y + <{elem_size_in_bits=8 : i32, tile_width=32 : i32, tile_height=8 : i32, v_blocks=1 : i32, + cache_control=#xevm.load_cache_control}> + : (!llvm.ptr<1>, i32, i32, i32, i32, i32) + llvm.return +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z38intel_sub_group_f16_f16_matrix_mad_k16Dv8_sDv8_iDv8_f( +// CHECK-SAME: vector<8xi16>, vector<8xi32>, vector<8xf32>) -> vector<8xf32> attributes +// CHECK-SAME: {convergent, memory_effects = #llvm.memory_effects, no_unwind, will_return} +// CHECK: llvm.func @mma(%[[ARG0:.*]]: vector<8xf32>, %[[ARG1:.*]]: vector<8xi16>, %[[ARG2:.*]]: vector<8xi32>) -> vector<8xf32> { +llvm.func @mma(%loaded_c_casted: vector<8xf32>, %loaded_a: vector<8xi16>, %loaded_b_casted: vector<8xi32>) -> vector<8xf32> { + // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z38intel_sub_group_f16_f16_matrix_mad_k16Dv8_sDv8_iDv8_f( + // CHECK-SAME: %[[ARG1]], %[[ARG2]], %[[ARG0]]) {convergent, function_type = + // CHECK-SAME: !llvm.func (vector<8xi16>, vector<8xi32>, vector<8xf32>)>, linkage = #llvm.linkage, + // CHECK-SAME: memory_effects = #llvm.memory_effects, no_unwind, + // CHECK-SAME: sym_name = "_Z38intel_sub_group_f16_f16_matrix_mad_k16Dv8_sDv8_iDv8_f", visibility_ = 0 : i64, will_return} + // CHECK-SAME: : (vector<8xi16>, vector<8xi32>, vector<8xf32>) -> vector<8xf32> + %c_result = xevm.mma %loaded_a, %loaded_b_casted, %loaded_c_casted + { shape=, types= } + : (vector<8xi16>, vector<8xi32>, vector<8xf32>) -> vector<8xf32> + llvm.return %c_result : vector<8xf32> +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z22atomic_work_item_fenceiii(i32, i32, i32) attributes {no_unwind} +llvm.func @memfence() { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(4 : i32) : i32 + // CHECK: %[[VAR1:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[VAR2:.*]] = llvm.mlir.constant(2 : i32) : i32 + // CHECK: llvm.call spir_funccc @_Z22atomic_work_item_fenceiii(%[[VAR2]], %[[VAR0]], %[[VAR1]]) + // CHECK-SAME: {function_type = !llvm.func, linkage = #llvm.linkage, no_unwind, + // CHECK-SAME: sym_name = "_Z22atomic_work_item_fenceiii", visibility_ = 0 : i64} : (i32, i32, i32) -> () + xevm.memfence <{addrspace=#xevm.addr_space, scope=#xevm.mem_scope}> + llvm.return +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z8prefetchPU3AS1Kcm(!llvm.ptr<1>, i64) attributes +// CHECK-SAME: {memory_effects = #llvm.memory_effects, no_unwind} +// CHECK: llvm.func @prefetch(%[[ARG0:.*]]: !llvm.ptr<1>) { +llvm.func @prefetch(%ptr: !llvm.ptr<1>) { + // CHECK: %[[VAR0:.*]] = llvm.mlir.constant(1 : i64) : i64 + // CHECK: llvm.call spir_funccc @_Z8prefetchPU3AS1Kcm(%[[ARG0]], %[[VAR0]]) + // CHECK-SAME: {function_type = !llvm.func, i64)>, linkage = #llvm.linkage, + // CHECK-SAME: memory_effects = #llvm.memory_effects, + // CHECK-SAME: no_unwind, sym_name = "_Z8prefetchPU3AS1Kcm", visibility_ = 0 : i64 + xevm.prefetch %ptr <{cache_control = #xevm.load_cache_control}> : (!llvm.ptr<1>) + llvm.return +} + diff --git a/mlir/test/Dialect/LLVMIR/attach-targets.mlir b/mlir/test/Dialect/LLVMIR/attach-targets.mlir index 83733db400798..d1112f7411aae 100644 --- a/mlir/test/Dialect/LLVMIR/attach-targets.mlir +++ b/mlir/test/Dialect/LLVMIR/attach-targets.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s --nvvm-attach-target='module=nvvm.* O=3 chip=sm_90' --rocdl-attach-target='module=rocdl.* O=3 chip=gfx90a' | FileCheck %s -// RUN: mlir-opt %s --nvvm-attach-target='module=options.* O=1 chip=sm_70 fast=true ftz=true' --rocdl-attach-target='module=options.* l=file1.bc,file2.bc wave64=false finite-only=true' | FileCheck %s --check-prefix=CHECK_OPTS +// RUN: mlir-opt %s --nvvm-attach-target='module=nvvm.* O=3 chip=sm_90' --rocdl-attach-target='module=rocdl.* O=3 chip=gfx90a' --xevm-attach-target='module=xevm.* O=3 chip=pvc' | FileCheck %s +// RUN: mlir-opt %s --nvvm-attach-target='module=options.* O=1 chip=sm_70 fast=true ftz=true' --rocdl-attach-target='module=options.* l=file1.bc,file2.bc wave64=false finite-only=true' --xevm-attach-target='module=options.* O=1 chip=pvc' | FileCheck %s --check-prefix=CHECK_OPTS module attributes {gpu.container_module} { // Verify the target is appended. @@ -18,12 +18,21 @@ gpu.module @nvvm_module_3 [#nvvm.target] { // CHECK: @rocdl_module [#rocdl.target] { gpu.module @rocdl_module { } +// Verify that other targets are not added as they fail to match the regex, but XeVM does get appended. +// CHECK: @xevm_module [#xevm.target] { +gpu.module @xevm_module { +} // Check the options were added. -// CHECK_OPTS: @options_module_1 [#nvvm.target, #rocdl.target] { +// CHECK_OPTS: @options_module_1 [#nvvm.target, +// CHECK_OPTS-SAME: #rocdl.target, +// CHECK_OPTS-SAME: #xevm.target] { gpu.module @options_module_1 { } // Check the options were added and that the first target was preserved. -// CHECK_OPTS: @options_module_2 [#nvvm.target, #nvvm.target, #rocdl.target] { +// CHECK_OPTS: @options_module_2 [#nvvm.target, +// CHECK_OPTS-SAME: #nvvm.target, +// CHECK_OPTS-SAME: #rocdl.target, +// CHECK_OPTS-SAME: #xevm.target] { gpu.module @options_module_2 [#nvvm.target] { } } diff --git a/mlir/test/Dialect/LLVMIR/cse-nvvm.mlir b/mlir/test/Dialect/LLVMIR/cse-nvvm.mlir new file mode 100644 index 0000000000000..8d24c3846f178 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/cse-nvvm.mlir @@ -0,0 +1,37 @@ +// RUN: mlir-opt %s -cse -split-input-file -verify-diagnostics | FileCheck %s + +// CHECK-LABEL: @nvvm_special_regs_clock +llvm.func @nvvm_special_regs_clock() -> !llvm.struct<(i32, i32)> { + %0 = llvm.mlir.zero: !llvm.struct<(i32, i32)> + // CHECK: {{.*}} = nvvm.read.ptx.sreg.clock + %1 = nvvm.read.ptx.sreg.clock : i32 + // CHECK: {{.*}} = nvvm.read.ptx.sreg.clock + %2 = nvvm.read.ptx.sreg.clock : i32 + %4 = llvm.insertvalue %1, %0[0]: !llvm.struct<(i32, i32)> + %5 = llvm.insertvalue %2, %4[1]: !llvm.struct<(i32, i32)> + llvm.return %5: !llvm.struct<(i32, i32)> +} + +// CHECK-LABEL: @nvvm_special_regs_clock64 +llvm.func @nvvm_special_regs_clock64() -> !llvm.struct<(i64, i64)> { + %0 = llvm.mlir.zero: !llvm.struct<(i64, i64)> + // CHECK: {{.*}} = nvvm.read.ptx.sreg.clock64 + %1 = nvvm.read.ptx.sreg.clock64 : i64 + // CHECK: {{.*}} = nvvm.read.ptx.sreg.clock64 + %2 = nvvm.read.ptx.sreg.clock64 : i64 + %4 = llvm.insertvalue %1, %0[0]: !llvm.struct<(i64, i64)> + %5 = llvm.insertvalue %2, %4[1]: !llvm.struct<(i64, i64)> + llvm.return %5: !llvm.struct<(i64, i64)> +} + +// CHECK-LABEL: @nvvm_special_regs_globaltimer +llvm.func @nvvm_special_regs_globaltimer() -> !llvm.struct<(i64, i64)> { + %0 = llvm.mlir.zero: !llvm.struct<(i64, i64)> + // CHECK: {{.*}} = nvvm.read.ptx.sreg.globaltimer + %1 = nvvm.read.ptx.sreg.globaltimer : i64 + // CHECK: {{.*}} = nvvm.read.ptx.sreg.globaltimer + %2 = nvvm.read.ptx.sreg.globaltimer : i64 + %4 = llvm.insertvalue %1, %0[0]: !llvm.struct<(i64, i64)> + %5 = llvm.insertvalue %2, %4[1]: !llvm.struct<(i64, i64)> + llvm.return %5: !llvm.struct<(i64, i64)> +} diff --git a/mlir/test/Dialect/SPIRV/IR/arithmetic-ops.mlir b/mlir/test/Dialect/SPIRV/IR/arithmetic-ops.mlir index d58c27598f2b8..3adafc15c79f6 100644 --- a/mlir/test/Dialect/SPIRV/IR/arithmetic-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/arithmetic-ops.mlir @@ -321,6 +321,15 @@ func.func @dot(%arg0: vector<4xf32>, %arg1: vector<4xf32>) -> f32 { // ----- +// CHECK-LABEL: @dot_bf16 +func.func @dot_bf16(%arg0: vector<4xbf16>, %arg1: vector<4xbf16>) -> bf16 { + // CHECK: spirv.Dot %{{.+}}, %{{.+}} : vector<4xbf16> -> bf16 + %0 = spirv.Dot %arg0, %arg1 : vector<4xbf16> -> bf16 + return %0 : bf16 +} + +// ----- + // expected-note @+1 {{prior use here}} func.func @dot(%arg0: vector<4xf32>, %arg1: vector<3xf32>) -> f32 { // expected-error @+1 {{use of value '%arg1' expects different type than prior uses}} @@ -339,7 +348,7 @@ func.func @dot(%arg0: vector<4xf32>, %arg1: vector<4xf32>) -> f16 { // ----- func.func @dot(%arg0: vector<4xi32>, %arg1: vector<4xi32>) -> i32 { - // expected-error @+1 {{'spirv.Dot' op operand #0 must be vector of 16/32/64-bit float values of length 2/3/4/8/16}} + // expected-error @+1 {{'spirv.Dot' op operand #0 must be vector of 16/32/64-bit float or BFloat16 values of length 2/3/4/8/16}} %0 = spirv.Dot %arg0, %arg1 : vector<4xi32> -> i32 return %0 : i32 } diff --git a/mlir/test/Dialect/SPIRV/IR/availability.mlir b/mlir/test/Dialect/SPIRV/IR/availability.mlir index 64ba8e3fc249e..9c8665b1e4bbe 100644 --- a/mlir/test/Dialect/SPIRV/IR/availability.mlir +++ b/mlir/test/Dialect/SPIRV/IR/availability.mlir @@ -234,6 +234,20 @@ func.func @udot_acc_sat_vector_4xi16_i64(%a: vector<4xi16>, %acc: i64) -> i64 { return %r: i64 } +//===----------------------------------------------------------------------===// +// Dot Product op with bfloat16 +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: dot_vector_4xbf16_bf16 +func.func @dot_vector_4xbf16_bf16(%a: vector<4xbf16>, %b: vector<4xbf16>) -> bf16 { + // CHECK: min version: v1.0 + // CHECK: max version: v1.6 + // CHECK: extensions: [ [SPV_KHR_bfloat16] ] + // CHECK: capabilities: [ [BFloat16DotProductKHR] ] + %r = spirv.Dot %a, %a: vector<4xbf16> -> bf16 + return %r: bf16 +} + //===----------------------------------------------------------------------===// // Primitive ops //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Vector/vector-sink.mlir b/mlir/test/Dialect/Vector/vector-sink.mlir index 900ad99bb4a4c..b826cdca134e6 100644 --- a/mlir/test/Dialect/Vector/vector-sink.mlir +++ b/mlir/test/Dialect/Vector/vector-sink.mlir @@ -514,6 +514,18 @@ func.func @negative_extract_vec_fma(%arg0: vector<4xf32>, %arg1: vector<4xf32>, return %1 : f32 } +// CHECK-LABEL: @negative_extract_dynamic_pos +func.func @negative_extract_dynamic_pos(%arg0: vector<4xf32>, %arg1 : vector<4xf32>, %idx : vector<4xindex>) -> f32 { + // CHECK-NOT: vector.extract + // CHECK: arith.addf %{{.*}}, %{{.*}} : vector<4xf32> + // CHECK: vector.extract + // CHECK: vector.extract + %0 = arith.addf %arg0, %arg1 : vector<4xf32> + %1 = vector.extract %idx[0] : index from vector<4xindex> + %2 = vector.extract %0[%1] : f32 from vector<4xf32> + return %2 : f32 +} + //----------------------------------------------------------------------------- // [Pattern: ExtractOpFromLoad] //----------------------------------------------------------------------------- diff --git a/mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir b/mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir index 63f06624ef897..72dc899f4f0a6 100644 --- a/mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir +++ b/mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir @@ -219,3 +219,19 @@ func.func @avx_dot(%a: vector<8xf32>, %b: vector<8xf32>) -> (vector<8xf32>) %0 = x86vector.avx.intr.dot %a, %b : vector<8xf32> return %0 : vector<8xf32> } + +// CHECK-LABEL: func @avx_dot_i8_128 +func.func @avx_dot_i8_128(%w: vector<4xi32>, %a: vector<16xi8>, + %b: vector<16xi8>) -> vector<4xi32> { + // CHECK: llvm.call_intrinsic "llvm.x86.avx2.vpdpbssd.128" + %0 = x86vector.avx.dot.i8 %w, %a, %b : vector<16xi8> -> vector<4xi32> + return %0 : vector<4xi32> +} + +// CHECK-LABEL: func @avx_dot_i8_256 +func.func @avx_dot_i8_256(%w: vector<8xi32>, %a: vector<32xi8>, + %b: vector<32xi8>) -> vector<8xi32> { + // CHECK: llvm.call_intrinsic "llvm.x86.avx2.vpdpbssd.256" + %0 = x86vector.avx.dot.i8 %w, %a, %b : vector<32xi8> -> vector<8xi32> + return %0 : vector<8xi32> +} diff --git a/mlir/test/Dialect/X86Vector/roundtrip.mlir b/mlir/test/Dialect/X86Vector/roundtrip.mlir index 7dcab3eb4dcb8..959177b27c7ea 100644 --- a/mlir/test/Dialect/X86Vector/roundtrip.mlir +++ b/mlir/test/Dialect/X86Vector/roundtrip.mlir @@ -229,3 +229,19 @@ func.func @avx_dot(%a: vector<8xf32>, %b: vector<8xf32>) -> (vector<8xf32>) %0 = x86vector.avx.intr.dot %a, %b : vector<8xf32> return %0 : vector<8xf32> } + +// CHECK-LABEL: func @avx_dot_i8_128 +func.func @avx_dot_i8_128(%w: vector<4xi32>, %a: vector<16xi8>, + %b: vector<16xi8>) -> vector<4xi32> { + // CHECK: x86vector.avx.dot.i8 {{.*}} : vector<16xi8> -> vector<4xi32> + %0 = x86vector.avx.dot.i8 %w, %a, %b : vector<16xi8> -> vector<4xi32> + return %0 : vector<4xi32> +} + +// CHECK-LABEL: func @avx_dot_i8_256 +func.func @avx_dot_i8_256(%w: vector<8xi32>, %a: vector<32xi8>, + %b: vector<32xi8>) -> vector<8xi32> { + // CHECK: x86vector.avx.dot.i8 {{.*}} : vector<32xi8> -> vector<8xi32> + %0 = x86vector.avx.dot.i8 %w, %a, %b : vector<32xi8> -> vector<8xi32> + return %0 : vector<8xi32> +} diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir new file mode 100644 index 0000000000000..d889ef4f5700c --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir @@ -0,0 +1,121 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { + omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1) + omp.declare_reduction @add_reduction_i32 : i32 init { + ^bb0(%arg0: i32 loc("test.f90":8:7)): + %0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc2) + omp.yield(%0 : i32) loc(#loc2) + } combiner { + ^bb0(%arg0: i32 loc("test.f90":8:7), %arg1: i32 loc("test.f90":8:7)): + %0 = llvm.add %arg0, %arg1 : i32 loc(#loc2) + omp.yield(%0 : i32) loc(#loc2) + } loc(#loc2) + llvm.func @_QQmain() { + %0 = llvm.mlir.constant(1 : i64) : i64 loc(#loc4) + %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr<5> loc(#loc4) + %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr loc(#loc4) + %3 = llvm.mlir.constant(1 : i64) : i64 loc(#loc1) + %4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc1) + %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc1) + %6 = llvm.mlir.constant(8191 : index) : i64 loc(#loc5) + %7 = llvm.mlir.constant(0 : index) : i64 loc(#loc5) + %8 = llvm.mlir.constant(1 : index) : i64 loc(#loc5) + %9 = llvm.mlir.constant(0 : i32) : i32 loc(#loc5) + %10 = llvm.mlir.constant(8192 : index) : i64 loc(#loc5) + %11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr<1> loc(#loc6) + %12 = llvm.addrspacecast %11 : !llvm.ptr<1> to !llvm.ptr loc(#loc6) + llvm.store %9, %2 : i32, !llvm.ptr loc(#loc7) + %15 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} loc(#loc4) + %16 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc7) + %17 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64) loc(#loc7) + %18 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.array<8192 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%17) -> !llvm.ptr {name = "arr"} loc(#loc7) + omp.target map_entries(%15 -> %arg0, %16 -> %arg1, %18 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + %19 = llvm.mlir.constant(8192 : i32) : i32 loc(#loc5) + %20 = llvm.mlir.constant(1 : i32) : i32 loc(#loc5) + %21 = llvm.mlir.constant(8192 : index) : i64 loc(#loc6) + omp.teams reduction(@add_reduction_i32 %arg0 -> %arg3 : !llvm.ptr) { + omp.parallel private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) { + omp.distribute { + omp.wsloop reduction(@add_reduction_i32 %arg3 -> %arg5 : !llvm.ptr) { + omp.loop_nest (%arg6) : i32 = (%20) to (%19) inclusive step (%20) { + llvm.store %arg6, %arg4 : i32, !llvm.ptr loc(#loc2) + %22 = llvm.load %arg5 : !llvm.ptr -> i32 loc(#loc8) + %23 = llvm.load %arg4 : !llvm.ptr -> i32 loc(#loc8) + %34 = llvm.add %22, %23 : i32 loc(#loc8) + llvm.store %34, %arg5 : i32, !llvm.ptr loc(#loc8) + omp.yield loc(#loc2) + } loc(#loc2) + } {omp.composite} loc(#loc2) + } {omp.composite} loc(#loc2) + omp.terminator loc(#loc2) + } {omp.composite} loc(#loc2) + omp.terminator loc(#loc2) + } loc(#loc2) + omp.terminator loc(#loc2) + } loc(#loc13) + llvm.return loc(#loc9) + } loc(#loc12) + llvm.mlir.global internal @_QFEarr() {addr_space = 1 : i32} : !llvm.array<8192 x i32> { + %0 = llvm.mlir.zero : !llvm.array<8192 x i32> loc(#loc6) + llvm.return %0 : !llvm.array<8192 x i32> loc(#loc6) + } loc(#loc6) +} loc(#loc) + +#loc = loc("test.f90":4:18) +#loc1 = loc("test.f90":4:18) +#loc2 = loc("test.f90":8:7) +#loc3 = loc("test.f90":1:7) +#loc4 = loc("test.f90":3:18) +#loc5 = loc(unknown) +#loc6 = loc("test.f90":5:18) +#loc7 = loc("test.f90":6:7) +#loc8 = loc("test.f90":10:7) +#loc9 = loc("test.f90":16:7) + +#di_file = #llvm.di_file<"target7.f90" in ""> +#di_null_type = #llvm.di_null_type +#di_compile_unit = #llvm.di_compile_unit, + sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang", + isOptimized = false, emissionKind = LineTablesOnly> +#di_subroutine_type = #llvm.di_subroutine_type< + callingConvention = DW_CC_program, types = #di_null_type> +#di_subprogram = #llvm.di_subprogram, + compileUnit = #di_compile_unit, scope = #di_file, name = "main", + file = #di_file, subprogramFlags = "Definition|MainSubprogram", + type = #di_subroutine_type> +#di_subprogram1 = #llvm.di_subprogram + + +#loc12 = loc(fused<#di_subprogram>[#loc3]) +#loc13 = loc(fused<#di_subprogram1>[#loc2]) + +// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func +// CHECK-NOT: !dbg +// CHECK: } +// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func +// CHECK-NOT: !dbg +// CHECK: } +// CHECK-DAG: define internal void @"__omp_offloading_{{.*}}__QQmain_l8_omp$reduction$reduction_func.1" +// CHECK-NOT: !dbg +// CHECK: } +// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func.2 +// CHECK-NOT: !dbg +// CHECK: } +// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func.3 +// CHECK-NOT: !dbg +// CHECK: } +// CHECK-DAG: define internal void @_omp_reduction_list_to_global_copy_func +// CHECK-NOT: !dbg +// CHECK: } +// CHECK-DAG: define internal void @_omp_reduction_list_to_global_reduce_func +// CHECK-NOT: !dbg +// CHECK: } +// CHECK-DAG: define internal void @_omp_reduction_global_to_list_copy_func +// CHECK-NOT: !dbg +// CHECK: } +// CHECK-DAG: define internal void @_omp_reduction_global_to_list_reduce_func +// CHECK-NOT: !dbg +// CHECK: } diff --git a/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir b/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir new file mode 100644 index 0000000000000..9abef003d6183 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir @@ -0,0 +1,175 @@ +// Test lowering of standalone omp.canonical_loop +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// CHECK-LABEL: define void @anon_loop( +// CHECK-SAME: ptr %[[ptr:.+]], +// CHECK-SAME: i32 %[[tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @anon_loop(%ptr: !llvm.ptr, %tc : i32) -> () { + omp.canonical_loop %iv : i32 in range(%tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + llvm.return +} + + + +// CHECK-LABEL: define void @trivial_loop( +// CHECK-SAME: ptr %[[ptr:.+]], +// CHECK-SAME: i32 %[[tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @trivial_loop(%ptr: !llvm.ptr, %tc : i32) -> () { + %cli = omp.new_cli + omp.canonical_loop(%cli) %iv : i32 in range(%tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + llvm.return +} + + +// CHECK-LABEL: define void @nested_loop( +// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[outer_tc:.+]], i32 %[[inner_tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[outer_tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: br label %omp_omp.loop.preheader1 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader1: +// CHECK-NEXT: br label %omp_omp.loop.header2 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header2: +// CHECK-NEXT: %omp_omp.loop.iv8 = phi i32 [ 0, %omp_omp.loop.preheader1 ], [ %omp_omp.loop.next10, %omp_omp.loop.inc5 ] +// CHECK-NEXT: br label %omp_omp.loop.cond3 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond3: +// CHECK-NEXT: %omp_omp.loop.cmp9 = icmp ult i32 %omp_omp.loop.iv8, %[[inner_tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp9, label %omp_omp.loop.body4, label %omp_omp.loop.exit6 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body4: +// CHECK-NEXT: br label %omp.loop.region12 +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region12: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont11 +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont11: +// CHECK-NEXT: br label %omp_omp.loop.inc5 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc5: +// CHECK-NEXT: %omp_omp.loop.next10 = add nuw i32 %omp_omp.loop.iv8, 1 +// CHECK-NEXT: br label %omp_omp.loop.header2 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit6: +// CHECK-NEXT: br label %omp_omp.loop.after7 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after7: +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @nested_loop(%ptr: !llvm.ptr, %outer_tc : i32, %inner_tc : i32) -> () { + %outer_cli = omp.new_cli + %inner_cli = omp.new_cli + omp.canonical_loop(%outer_cli) %outer_iv : i32 in range(%outer_tc) { + omp.canonical_loop(%inner_cli) %inner_iv : i32 in range(%inner_tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.terminator + } + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir new file mode 100644 index 0000000000000..0f0448e15f983 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir @@ -0,0 +1,56 @@ +// Test lowering of the omp.unroll_heuristic +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + + +// CHECK-LABEL: define void @unroll_heuristic_trivial_loop( +// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header, !llvm.loop ![[$MD1:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @unroll_heuristic_trivial_loop(%ptr: !llvm.ptr, %tc: i32) -> () { + %literal_cli = omp.new_cli + omp.canonical_loop(%literal_cli) %iv : i32 in range(%tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.unroll_heuristic(%literal_cli) + llvm.return +} + + +// Start of metadata +// CHECK-LABEL: !llvm.module.flags + +// CHECK: ![[$MD1]] = distinct !{![[$MD1]], ![[$MD2:[0-9]+]]} +// CHECK: ![[$MD2]] = !{!"llvm.loop.unroll.enable"} diff --git a/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir new file mode 100644 index 0000000000000..f82b4990e378e --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir @@ -0,0 +1,93 @@ +// Test lowering of the omp.unroll_heuristic +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + + +// CHECK-LABEL: define void @unroll_heuristic_nested_loop( +// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[outer_tc:.+]], i32 %[[inner_tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[outer_tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: br label %omp_omp.loop.preheader1 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader1: +// CHECK-NEXT: br label %omp_omp.loop.header2 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header2: +// CHECK-NEXT: %omp_omp.loop.iv8 = phi i32 [ 0, %omp_omp.loop.preheader1 ], [ %omp_omp.loop.next10, %omp_omp.loop.inc5 ] +// CHECK-NEXT: br label %omp_omp.loop.cond3 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond3: +// CHECK-NEXT: %omp_omp.loop.cmp9 = icmp ult i32 %omp_omp.loop.iv8, %[[inner_tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp9, label %omp_omp.loop.body4, label %omp_omp.loop.exit6 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body4: +// CHECK-NEXT: br label %omp.loop.region12 +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region12: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont11 +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont11: +// CHECK-NEXT: br label %omp_omp.loop.inc5 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc5: +// CHECK-NEXT: %omp_omp.loop.next10 = add nuw i32 %omp_omp.loop.iv8, 1 +// CHECK-NEXT: br label %omp_omp.loop.header2, !llvm.loop ![[$MD1:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit6: +// CHECK-NEXT: br label %omp_omp.loop.after7 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after7: +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header, !llvm.loop ![[$MD3:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @unroll_heuristic_nested_loop(%ptr: !llvm.ptr, %outer_tc: i32, %inner_tc: i32) -> () { + %outer_cli = omp.new_cli + %inner_cli = omp.new_cli + omp.canonical_loop(%outer_cli) %outer_iv : i32 in range(%outer_tc) { + omp.canonical_loop(%inner_cli) %inner_iv : i32 in range(%inner_tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.terminator + } + omp.unroll_heuristic(%outer_cli) + omp.unroll_heuristic(%inner_cli) + llvm.return +} + + +// Start of metadata +// CHECK-LABEL: !llvm.module.flags + +// CHECK: ![[$MD1]] = distinct !{![[$MD1]], ![[$MD2:[0-9]+]]} +// CHECK: ![[$MD2]] = !{!"llvm.loop.unroll.enable"} +// CHECK: ![[$MD3]] = distinct !{![[$MD3]], ![[$MD2]]} + diff --git a/mlir/test/Target/LLVMIR/x86vector.mlir b/mlir/test/Target/LLVMIR/x86vector.mlir index d11dc89bdc7c9..74ae2424964b1 100644 --- a/mlir/test/Target/LLVMIR/x86vector.mlir +++ b/mlir/test/Target/LLVMIR/x86vector.mlir @@ -234,3 +234,19 @@ func.func @LLVM_x86_avx_dp_ps_256( %0 = x86vector.avx.intr.dot %a, %b : vector<8xf32> return %0 : vector<8xf32> } + +// CHECK-LABEL: define <4 x i32> @LLVM_x86_avx2_vpdpbssd_128 +func.func @LLVM_x86_avx2_vpdpbssd_128(%w: vector<4xi32>, %a: vector<16xi8>, + %b: vector<16xi8>) -> vector<4xi32> { + // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpbssd.128( + %0 = x86vector.avx.dot.i8 %w, %a, %b : vector<16xi8> -> vector<4xi32> + return %0 : vector<4xi32> +} + +// CHECK-LABEL: define <8 x i32> @LLVM_x86_avx2_vpdpbssd_256 +func.func @LLVM_x86_avx2_vpdpbssd_256(%w: vector<8xi32>, %a: vector<32xi8>, + %b: vector<32xi8>) -> vector<8xi32> { + // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpbssd.256( + %0 = x86vector.avx.dot.i8 %w, %a, %b : vector<32xi8> -> vector<8xi32> + return %0 : vector<8xi32> +} diff --git a/mlir/test/Target/SPIRV/arithmetic-ops.mlir b/mlir/test/Target/SPIRV/arithmetic-ops.mlir index b1ea13c6854fd..b80e17f979daa 100644 --- a/mlir/test/Target/SPIRV/arithmetic-ops.mlir +++ b/mlir/test/Target/SPIRV/arithmetic-ops.mlir @@ -86,4 +86,9 @@ spirv.module Logical GLSL450 requires #spirv.vce { %0 = spirv.VectorTimesScalar %arg0, %arg1 : (vector<4xf32>, f32) -> vector<4xf32> spirv.Return } + spirv.func @dot_bf16(%arg0: vector<4xbf16>, %arg1: vector<4xbf16>) "None" { + // CHECK: spirv.Dot %{{.+}}, %{{.+}} : vector<4xbf16> -> bf16 + %0 = spirv.Dot %arg0, %arg1 : vector<4xbf16> -> bf16 + spirv.Return + } } diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt index 4ca5974ed5a49..418c884dc03b3 100644 --- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt @@ -29,6 +29,7 @@ set(LIBS MLIRTranslateLib MLIRVectorDialect MLIRVectorToLLVMPass + MLIRXeVMDialect ) add_mlir_library(MLIRGPUTestPasses diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td index 4d825e2f0a8cc..382da592d0079 100644 --- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td +++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td @@ -431,4 +431,11 @@ def SlashAttr: Test_Attr<"Slash">{ let hasCustomAssemblyFormat = 1; } +def TestCustomStorageCtorAttr : Test_Attr<"TestCustomStorageCtorAttr"> { + let mnemonic = "custom_storage_ctor_attr"; + let parameters = (ins "int":$value); + let assemblyFormat = "`<` $value `>`"; + let hasStorageCustomConstructor = 1; +} + #endif // TEST_ATTRDEFS diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp index 4f6655d0b2978..b31e90fc9ca91 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp +++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp @@ -515,6 +515,18 @@ void SlashAttr::print(AsmPrinter &printer) const { printer << "<" << getLhs() << " / " << getRhs() << ">"; } +//===----------------------------------------------------------------------===// +// TestCustomStorageCtorAttr +//===----------------------------------------------------------------------===// + +test::detail::TestCustomStorageCtorAttrAttrStorage * +test::detail::TestCustomStorageCtorAttrAttrStorage::construct( + mlir::StorageUniquer::StorageAllocator &, std::tuple &&) { + // Note: this tests linker error ("undefined symbol"), the actual + // implementation is not important. + return nullptr; +} + //===----------------------------------------------------------------------===// // TestDialect //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestTypeDefs.td b/mlir/test/lib/Dialect/Test/TestTypeDefs.td index 03261f37c815d..ea20597231d58 100644 --- a/mlir/test/lib/Dialect/Test/TestTypeDefs.td +++ b/mlir/test/lib/Dialect/Test/TestTypeDefs.td @@ -352,6 +352,13 @@ def TestTypeCustomString : Test_Type<"TestTypeCustomString"> { custom(ref($foo)) `>` }]; } +def TestCustomStorageCtor : Test_Type<"TestCustomStorageCtor"> { + let mnemonic = "custom_storage_ctor_type"; + let parameters = (ins "int":$value); + let assemblyFormat = "`<` $value `>`"; + let hasStorageCustomConstructor = 1; +} + def TestTypeOptionalString : Test_Type<"TestTypeOptionalString"> { let parameters = (ins StringRefParameter<"description", [{"default"}]>:$str); let mnemonic = "optional_type_string"; diff --git a/mlir/test/lib/Dialect/Test/TestTypes.cpp b/mlir/test/lib/Dialect/Test/TestTypes.cpp index 2fc2f90ef6bc0..bea043f56fe21 100644 --- a/mlir/test/lib/Dialect/Test/TestTypes.cpp +++ b/mlir/test/lib/Dialect/Test/TestTypes.cpp @@ -392,6 +392,14 @@ getCustomAssemblyFormatDynamicType(TestDialect *testDialect) { std::move(parser), std::move(printer)); } +test::detail::TestCustomStorageCtorTypeStorage * +test::detail::TestCustomStorageCtorTypeStorage::construct( + mlir::StorageUniquer::StorageAllocator &, std::tuple &&) { + // Note: this tests linker error ("undefined symbol"), the actual + // implementation is not important. + return nullptr; +} + //===----------------------------------------------------------------------===// // TestDialect //===----------------------------------------------------------------------===// diff --git a/mlir/test/mlir-tblgen/attrdefs.td b/mlir/test/mlir-tblgen/attrdefs.td index adec90dc5a371..d47411d6e860a 100644 --- a/mlir/test/mlir-tblgen/attrdefs.td +++ b/mlir/test/mlir-tblgen/attrdefs.td @@ -186,3 +186,16 @@ def I_TestGenMnemonicAliasAttr : TestAttr<"TestGenMnemonicAlias"> { // DEF-NEXT: os << "test_gen_mnemonic_alias"; // DEF-NEXT: return ::mlir::OpAsmAliasResult::OverridableAlias; // DEF-NEXT: } + +def J_CustomStorageCtorAttr : AttrDef { + let attrName = "test_custom_storage_ctor_attr"; + let parameters = (ins "bool":$flag); + let hasStorageCustomConstructor = 1; +} + +// Note: ';' at the end of construct method declaration is important - otherwise +// one cannot provide custom definition + +// DEF-LABEL: struct CustomStorageCtorAttrAttrStorage : public ::mlir::AttributeStorage +// DEF: static CustomStorageCtorAttrAttrStorage *construct +// DEF-SAME: (::mlir::AttributeStorageAllocator &allocator, KeyTy &&tblgenKey); diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index d9aa901ee2b28..dbae2143b920a 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -668,10 +668,10 @@ void DefGen::emitHashKey() { } void DefGen::emitConstruct() { - Method *construct = storageCls->addMethod( + Method *construct = storageCls->addMethod( strfmt("{0} *", def.getStorageClassName()), "construct", def.hasStorageCustomConstructor() ? Method::StaticDeclaration - : Method::Static, + : Method::StaticInline, MethodParameter(strfmt("::mlir::{0}StorageAllocator &", valueType), "allocator"), MethodParameter("KeyTy &&", "tblgenKey")); diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td index 7cb3016afd597..1e9537452820d 100644 --- a/offload/liboffload/API/Kernel.td +++ b/offload/liboffload/API/Kernel.td @@ -6,25 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file contains Offload API definitions related to loading and launching -// kernels +// This file contains Offload API definitions related to launching kernels // //===----------------------------------------------------------------------===// -def : Function { - let name = "olGetKernel"; - let desc = "Get a kernel from the function identified by `KernelName` in the given program."; - let details = [ - "Symbol handles are owned by the program and do not need to be manually destroyed." - ]; - let params = [ - Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>, - Param<"const char*", "KernelName", "name of the kernel entry point in the program", PARAM_IN>, - Param<"ol_symbol_handle_t*", "Kernel", "output pointer for the fetched kernel", PARAM_OUT> - ]; - let returns = []; -} - def : Struct { let name = "ol_kernel_launch_size_args_t"; let desc = "Size-related arguments for a kernel launch."; diff --git a/offload/liboffload/API/Symbol.td b/offload/liboffload/API/Symbol.td index cf4d45b09f035..2e94d703809e7 100644 --- a/offload/liboffload/API/Symbol.td +++ b/offload/liboffload/API/Symbol.td @@ -15,5 +15,77 @@ def : Enum { let desc = "The kind of a symbol"; let etors =[ Etor<"KERNEL", "a kernel object">, + Etor<"GLOBAL_VARIABLE", "a global variable">, + ]; +} + +def : Function { + let name = "olGetSymbol"; + let desc = "Get a symbol (kernel or global variable) identified by `Name` in the given program."; + let details = [ + "Symbol handles are owned by the program and do not need to be manually destroyed." + ]; + let params = [ + Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>, + Param<"const char*", "Name", "name of the symbol to look up", PARAM_IN>, + Param<"ol_symbol_kind_t", "Kind", "symbol kind to look up", PARAM_IN>, + Param<"ol_symbol_handle_t*", "Symbol", "output pointer for the symbol", PARAM_OUT>, + ]; + let returns = []; +} + +def : Enum { + let name = "ol_symbol_info_t"; + let desc = "Supported symbol info."; + let is_typed = 1; + let etors = [ + TaggedEtor<"KIND", "ol_symbol_kind_t", "The kind of this symbol.">, + TaggedEtor<"GLOBAL_VARIABLE_ADDRESS", "void *", "The address in memory for this global variable.">, + TaggedEtor<"GLOBAL_VARIABLE_SIZE", "size_t", "The size in bytes for this global variable.">, + ]; +} + +def : Function { + let name = "olGetSymbolInfo"; + let desc = "Queries the given property of the symbol."; + let details = [ + "`olGetSymbolInfoSize` can be used to query the storage size " + "required for the given query." + ]; + let params = [ + Param<"ol_symbol_handle_t", "Symbol", "handle of the symbol", PARAM_IN>, + Param<"ol_symbol_info_t", "PropName", "type of the info to retrieve", PARAM_IN>, + Param<"size_t", "PropSize", "the number of bytes pointed to by PropValue.", PARAM_IN>, + TypeTaggedParam<"void*", "PropValue", "array of bytes holding the info. " + "If PropSize is not equal to or greater to the real number of bytes needed to return the info " + "then the OL_ERRC_INVALID_SIZE error is returned and PropValue is not used.", PARAM_OUT, + TypeInfo<"PropName" , "PropSize">> + ]; + let returns = [ + Return<"OL_ERRC_INVALID_SIZE", [ + "`PropSize == 0`", + "If `PropSize` is less than the real number of bytes needed to return the info." + ]>, + Return<"OL_ERRC_SYMBOL_KIND", [ + "If the requested info isn't applicable to the type of symbol." + ]>, + Return<"OL_ERRC_INVALID_SYMBOL"> + ]; +} + +def : Function { + let name = "olGetSymbolInfoSize"; + let desc = "Returns the storage size of the given symbol query."; + let details = []; + let params = [ + Param<"ol_symbol_handle_t", "Symbol", "handle of the symbol", PARAM_IN>, + Param<"ol_symbol_info_t", "PropName", "type of the info to query", PARAM_IN>, + Param<"size_t*", "PropSizeRet", "pointer to the number of bytes required to store the query", PARAM_OUT> + ]; + let returns = [ + Return<"OL_ERRC_INVALID_SYMBOL">, + Return<"OL_ERRC_SYMBOL_KIND", [ + "If the requested info isn't applicable to the type of symbol." + ]>, ]; } diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index fa5d18c044048..17a2b00cb7140 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -91,7 +91,9 @@ struct ol_program_impl_t { struct ol_symbol_impl_t { ol_symbol_impl_t(GenericKernelTy *Kernel) : PluginImpl(Kernel), Kind(OL_SYMBOL_KIND_KERNEL) {} - std::variant PluginImpl; + ol_symbol_impl_t(GlobalTy &&Global) + : PluginImpl(Global), Kind(OL_SYMBOL_KIND_GLOBAL_VARIABLE) {} + std::variant PluginImpl; ol_symbol_kind_t Kind; }; @@ -660,24 +662,6 @@ Error olDestroyProgram_impl(ol_program_handle_t Program) { return olDestroy(Program); } -Error olGetKernel_impl(ol_program_handle_t Program, const char *KernelName, - ol_symbol_handle_t *Kernel) { - - auto &Device = Program->Image->getDevice(); - auto KernelImpl = Device.constructKernel(KernelName); - if (!KernelImpl) - return KernelImpl.takeError(); - - if (auto Err = KernelImpl->init(Device, *Program->Image)) - return Err; - - *Kernel = Program->Symbols - .emplace_back(std::make_unique(&*KernelImpl)) - .get(); - - return Error::success(); -} - Error olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device, ol_symbol_handle_t Kernel, const void *ArgumentsData, size_t ArgumentsSize, @@ -726,5 +710,90 @@ Error olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device, return Error::success(); } +Error olGetSymbol_impl(ol_program_handle_t Program, const char *Name, + ol_symbol_kind_t Kind, ol_symbol_handle_t *Symbol) { + auto &Device = Program->Image->getDevice(); + + switch (Kind) { + case OL_SYMBOL_KIND_KERNEL: { + auto KernelImpl = Device.constructKernel(Name); + if (!KernelImpl) + return KernelImpl.takeError(); + + if (auto Err = KernelImpl->init(Device, *Program->Image)) + return Err; + + *Symbol = + Program->Symbols + .emplace_back(std::make_unique(&*KernelImpl)) + .get(); + return Error::success(); + } + case OL_SYMBOL_KIND_GLOBAL_VARIABLE: { + GlobalTy GlobalObj{Name}; + if (auto Res = Device.Plugin.getGlobalHandler().getGlobalMetadataFromDevice( + Device, *Program->Image, GlobalObj)) + return Res; + + *Symbol = Program->Symbols + .emplace_back( + std::make_unique(std::move(GlobalObj))) + .get(); + + return Error::success(); + } + default: + return createOffloadError(ErrorCode::INVALID_ENUMERATION, + "getSymbol kind enum '%i' is invalid", Kind); + } +} + +Error olGetSymbolInfoImplDetail(ol_symbol_handle_t Symbol, + ol_symbol_info_t PropName, size_t PropSize, + void *PropValue, size_t *PropSizeRet) { + InfoWriter Info(PropSize, PropValue, PropSizeRet); + + auto CheckKind = [&](ol_symbol_kind_t Required) { + if (Symbol->Kind != Required) { + std::string ErrBuffer; + llvm::raw_string_ostream(ErrBuffer) + << PropName << ": Expected a symbol of Kind " << Required + << " but given a symbol of Kind " << Symbol->Kind; + return Plugin::error(ErrorCode::SYMBOL_KIND, ErrBuffer.c_str()); + } + return Plugin::success(); + }; + + switch (PropName) { + case OL_SYMBOL_INFO_KIND: + return Info.write(Symbol->Kind); + case OL_SYMBOL_INFO_GLOBAL_VARIABLE_ADDRESS: + if (auto Err = CheckKind(OL_SYMBOL_KIND_GLOBAL_VARIABLE)) + return Err; + return Info.write(std::get(Symbol->PluginImpl).getPtr()); + case OL_SYMBOL_INFO_GLOBAL_VARIABLE_SIZE: + if (auto Err = CheckKind(OL_SYMBOL_KIND_GLOBAL_VARIABLE)) + return Err; + return Info.write(std::get(Symbol->PluginImpl).getSize()); + default: + return createOffloadError(ErrorCode::INVALID_ENUMERATION, + "olGetSymbolInfo enum '%i' is invalid", PropName); + } + + return Error::success(); +} + +Error olGetSymbolInfo_impl(ol_symbol_handle_t Symbol, ol_symbol_info_t PropName, + size_t PropSize, void *PropValue) { + + return olGetSymbolInfoImplDetail(Symbol, PropName, PropSize, PropValue, + nullptr); +} + +Error olGetSymbolInfoSize_impl(ol_symbol_handle_t Symbol, + ol_symbol_info_t PropName, size_t *PropSizeRet) { + return olGetSymbolInfoImplDetail(Symbol, PropName, 0, nullptr, PropSizeRet); +} + } // namespace offload } // namespace llvm diff --git a/offload/tools/offload-tblgen/PrintGen.cpp b/offload/tools/offload-tblgen/PrintGen.cpp index d1189688a90a3..89d7c820426cf 100644 --- a/offload/tools/offload-tblgen/PrintGen.cpp +++ b/offload/tools/offload-tblgen/PrintGen.cpp @@ -74,8 +74,12 @@ inline void printTagged(llvm::raw_ostream &os, const void *ptr, {0} value, size_ if (Type == "char[]") { OS << formatv(TAB_2 "printPtr(os, (const char*) ptr);\n"); } else { - OS << formatv(TAB_2 "const {0} * const tptr = (const {0} * const)ptr;\n", - Type); + if (Type == "void *") + OS << formatv(TAB_2 "void * const * const tptr = (void * " + "const * const)ptr;\n"); + else + OS << formatv( + TAB_2 "const {0} * const tptr = (const {0} * const)ptr;\n", Type); // TODO: Handle other cases here OS << TAB_2 "os << (const void *)tptr << \" (\";\n"; if (Type.ends_with("*")) { diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt index 93e5fd2f6cd26..d76338612210d 100644 --- a/offload/unittests/OffloadAPI/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/CMakeLists.txt @@ -19,7 +19,6 @@ add_offload_unittest("init" target_compile_definitions("init.unittests" PRIVATE DISABLE_WRAPPER) add_offload_unittest("kernel" - kernel/olGetKernel.cpp kernel/olLaunchKernel.cpp) add_offload_unittest("memory" @@ -41,3 +40,8 @@ add_offload_unittest("queue" queue/olDestroyQueue.cpp queue/olGetQueueInfo.cpp queue/olGetQueueInfoSize.cpp) + +add_offload_unittest("symbol" + symbol/olGetSymbol.cpp + symbol/olGetSymbolInfo.cpp + symbol/olGetSymbolInfoSize.cpp) diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp index e443d9761f30b..16ff3c4fe60a7 100644 --- a/offload/unittests/OffloadAPI/common/Fixtures.hpp +++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp @@ -91,9 +91,12 @@ struct OffloadPlatformTest : OffloadDeviceTest { // Fixture for a generic program test. If you want a different program, use // offloadQueueTest and create your own program handle with the binary you want. struct OffloadProgramTest : OffloadDeviceTest { - void SetUp() override { + void SetUp() override { SetUpWith("foo"); } + + void SetUpWith(const char *ProgramName) { RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp()); - ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin)); + ASSERT_TRUE( + TestEnvironment::loadDeviceBinary(ProgramName, Device, DeviceBin)); ASSERT_GE(DeviceBin->getBufferSize(), 0lu); ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(), DeviceBin->getBufferSize(), &Program)); @@ -113,7 +116,7 @@ struct OffloadProgramTest : OffloadDeviceTest { struct OffloadKernelTest : OffloadProgramTest { void SetUp() override { RETURN_ON_FATAL_FAILURE(OffloadProgramTest::SetUp()); - ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel)); + ASSERT_SUCCESS(olGetSymbol(Program, "foo", OL_SYMBOL_KIND_KERNEL, &Kernel)); } void TearDown() override { @@ -123,6 +126,20 @@ struct OffloadKernelTest : OffloadProgramTest { ol_symbol_handle_t Kernel = nullptr; }; +struct OffloadGlobalTest : OffloadProgramTest { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(OffloadProgramTest::SetUpWith("global")); + ASSERT_SUCCESS(olGetSymbol(Program, "global", + OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global)); + } + + void TearDown() override { + RETURN_ON_FATAL_FAILURE(OffloadProgramTest::TearDown()); + } + + ol_symbol_handle_t Global = nullptr; +}; + struct OffloadQueueTest : OffloadDeviceTest { void SetUp() override { RETURN_ON_FATAL_FAILURE(OffloadDeviceTest::SetUp()); diff --git a/offload/unittests/OffloadAPI/device_code/global.c b/offload/unittests/OffloadAPI/device_code/global.c index b30e406fb98c7..9f27f9424324f 100644 --- a/offload/unittests/OffloadAPI/device_code/global.c +++ b/offload/unittests/OffloadAPI/device_code/global.c @@ -1,6 +1,7 @@ #include #include +[[gnu::visibility("default")]] uint32_t global[64]; __gpu_kernel void write() { diff --git a/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp b/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp deleted file mode 100644 index 34870f1fbf0a3..0000000000000 --- a/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===------- Offload API tests - olGetKernel ---------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../common/Fixtures.hpp" -#include -#include - -using olGetKernelTest = OffloadProgramTest; -OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olGetKernelTest); - -TEST_P(olGetKernelTest, Success) { - ol_symbol_handle_t Kernel = nullptr; - ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel)); - ASSERT_NE(Kernel, nullptr); -} - -TEST_P(olGetKernelTest, InvalidNullProgram) { - ol_symbol_handle_t Kernel = nullptr; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, - olGetKernel(nullptr, "foo", &Kernel)); -} - -TEST_P(olGetKernelTest, InvalidNullKernelPointer) { - ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, - olGetKernel(Program, "foo", nullptr)); -} - -// Error code returning from plugin interface not yet supported -TEST_P(olGetKernelTest, InvalidKernelName) { - ol_symbol_handle_t Kernel = nullptr; - ASSERT_ERROR(OL_ERRC_NOT_FOUND, - olGetKernel(Program, "invalid_kernel_name", &Kernel)); -} diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp index acda4795edec2..e7e608f2a64d4 100644 --- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp +++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp @@ -40,7 +40,8 @@ struct LaunchKernelTestBase : OffloadQueueTest { struct LaunchSingleKernelTestBase : LaunchKernelTestBase { void SetUpKernel(const char *kernel) { RETURN_ON_FATAL_FAILURE(SetUpProgram(kernel)); - ASSERT_SUCCESS(olGetKernel(Program, kernel, &Kernel)); + ASSERT_SUCCESS( + olGetSymbol(Program, kernel, OL_SYMBOL_KIND_KERNEL, &Kernel)); } ol_symbol_handle_t Kernel = nullptr; @@ -67,7 +68,8 @@ struct LaunchMultipleKernelTestBase : LaunchKernelTestBase { Kernels.resize(kernels.size()); size_t I = 0; for (auto K : kernels) - ASSERT_SUCCESS(olGetKernel(Program, K, &Kernels[I++])); + ASSERT_SUCCESS( + olGetSymbol(Program, K, OL_SYMBOL_KIND_KERNEL, &Kernels[I++])); } std::vector Kernels; @@ -223,6 +225,15 @@ TEST_P(olLaunchKernelGlobalTest, Success) { ASSERT_SUCCESS(olMemFree(Mem)); } +TEST_P(olLaunchKernelGlobalTest, InvalidNotAKernel) { + ol_symbol_handle_t Global = nullptr; + ASSERT_SUCCESS( + olGetSymbol(Program, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global)); + ASSERT_ERROR( + OL_ERRC_SYMBOL_KIND, + olLaunchKernel(Queue, Device, Global, nullptr, 0, &LaunchArgs, nullptr)); +} + TEST_P(olLaunchKernelGlobalCtorTest, Success) { void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, diff --git a/offload/unittests/OffloadAPI/memory/olMemcpy.cpp b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp index c1762b451b81d..c1fb6df9bad0d 100644 --- a/offload/unittests/OffloadAPI/memory/olMemcpy.cpp +++ b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp @@ -13,6 +13,32 @@ using olMemcpyTest = OffloadQueueTest; OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemcpyTest); +struct olMemcpyGlobalTest : OffloadGlobalTest { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(OffloadGlobalTest::SetUp()); + ASSERT_SUCCESS( + olGetSymbol(Program, "read", OL_SYMBOL_KIND_KERNEL, &ReadKernel)); + ASSERT_SUCCESS( + olGetSymbol(Program, "write", OL_SYMBOL_KIND_KERNEL, &WriteKernel)); + ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); + ASSERT_SUCCESS(olGetSymbolInfo( + Global, OL_SYMBOL_INFO_GLOBAL_VARIABLE_ADDRESS, sizeof(Addr), &Addr)); + + LaunchArgs.Dimensions = 1; + LaunchArgs.GroupSize = {64, 1, 1}; + LaunchArgs.NumGroups = {1, 1, 1}; + + LaunchArgs.DynSharedMemory = 0; + } + + ol_kernel_launch_size_args_t LaunchArgs{}; + void *Addr; + ol_symbol_handle_t ReadKernel; + ol_symbol_handle_t WriteKernel; + ol_queue_handle_t Queue; +}; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemcpyGlobalTest); + TEST_P(olMemcpyTest, SuccessHtoD) { constexpr size_t Size = 1024; void *Alloc; @@ -105,3 +131,82 @@ TEST_P(olMemcpyTest, SuccessSizeZero) { ASSERT_SUCCESS( olMemcpy(nullptr, Output.data(), Host, Input.data(), Host, 0, nullptr)); } + +TEST_P(olMemcpyGlobalTest, SuccessRoundTrip) { + void *SourceMem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + 64 * sizeof(uint32_t), &SourceMem)); + uint32_t *SourceData = (uint32_t *)SourceMem; + for (auto I = 0; I < 64; I++) + SourceData[I] = I; + + void *DestMem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + 64 * sizeof(uint32_t), &DestMem)); + + ASSERT_SUCCESS(olMemcpy(Queue, Addr, Device, SourceMem, Host, + 64 * sizeof(uint32_t), nullptr)); + ASSERT_SUCCESS(olWaitQueue(Queue)); + ASSERT_SUCCESS(olMemcpy(Queue, DestMem, Host, Addr, Device, + 64 * sizeof(uint32_t), nullptr)); + ASSERT_SUCCESS(olWaitQueue(Queue)); + + uint32_t *DestData = (uint32_t *)DestMem; + for (uint32_t I = 0; I < 64; I++) + ASSERT_EQ(DestData[I], I); + + ASSERT_SUCCESS(olMemFree(DestMem)); + ASSERT_SUCCESS(olMemFree(SourceMem)); +} + +TEST_P(olMemcpyGlobalTest, SuccessWrite) { + void *SourceMem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + LaunchArgs.GroupSize.x * sizeof(uint32_t), + &SourceMem)); + uint32_t *SourceData = (uint32_t *)SourceMem; + for (auto I = 0; I < 64; I++) + SourceData[I] = I; + + void *DestMem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + LaunchArgs.GroupSize.x * sizeof(uint32_t), + &DestMem)); + struct { + void *Mem; + } Args{DestMem}; + + ASSERT_SUCCESS(olMemcpy(Queue, Addr, Device, SourceMem, Host, + 64 * sizeof(uint32_t), nullptr)); + ASSERT_SUCCESS(olWaitQueue(Queue)); + ASSERT_SUCCESS(olLaunchKernel(Queue, Device, ReadKernel, &Args, sizeof(Args), + &LaunchArgs, nullptr)); + ASSERT_SUCCESS(olWaitQueue(Queue)); + + uint32_t *DestData = (uint32_t *)DestMem; + for (uint32_t I = 0; I < 64; I++) + ASSERT_EQ(DestData[I], I); + + ASSERT_SUCCESS(olMemFree(DestMem)); + ASSERT_SUCCESS(olMemFree(SourceMem)); +} + +TEST_P(olMemcpyGlobalTest, SuccessRead) { + void *DestMem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + LaunchArgs.GroupSize.x * sizeof(uint32_t), + &DestMem)); + + ASSERT_SUCCESS(olLaunchKernel(Queue, Device, WriteKernel, nullptr, 0, + &LaunchArgs, nullptr)); + ASSERT_SUCCESS(olWaitQueue(Queue)); + ASSERT_SUCCESS(olMemcpy(Queue, DestMem, Host, Addr, Device, + 64 * sizeof(uint32_t), nullptr)); + ASSERT_SUCCESS(olWaitQueue(Queue)); + + uint32_t *DestData = (uint32_t *)DestMem; + for (uint32_t I = 0; I < 64; I++) + ASSERT_EQ(DestData[I], I * 2); + + ASSERT_SUCCESS(olMemFree(DestMem)); +} diff --git a/offload/unittests/OffloadAPI/symbol/olGetSymbol.cpp b/offload/unittests/OffloadAPI/symbol/olGetSymbol.cpp new file mode 100644 index 0000000000000..5e87ab5b29621 --- /dev/null +++ b/offload/unittests/OffloadAPI/symbol/olGetSymbol.cpp @@ -0,0 +1,93 @@ +//===------- Offload API tests - olGetSymbol ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include +#include + +using olGetSymbolKernelTest = OffloadProgramTest; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olGetSymbolKernelTest); + +struct olGetSymbolGlobalTest : OffloadQueueTest { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp()); + ASSERT_TRUE(TestEnvironment::loadDeviceBinary("global", Device, DeviceBin)); + ASSERT_GE(DeviceBin->getBufferSize(), 0lu); + ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(), + DeviceBin->getBufferSize(), &Program)); + } + + void TearDown() override { + if (Program) { + olDestroyProgram(Program); + } + RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown()); + } + + std::unique_ptr DeviceBin; + ol_program_handle_t Program = nullptr; + ol_kernel_launch_size_args_t LaunchArgs{}; +}; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olGetSymbolGlobalTest); + +TEST_P(olGetSymbolKernelTest, Success) { + ol_symbol_handle_t Kernel = nullptr; + ASSERT_SUCCESS(olGetSymbol(Program, "foo", OL_SYMBOL_KIND_KERNEL, &Kernel)); + ASSERT_NE(Kernel, nullptr); +} + +TEST_P(olGetSymbolKernelTest, InvalidNullProgram) { + ol_symbol_handle_t Kernel = nullptr; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, + olGetSymbol(nullptr, "foo", OL_SYMBOL_KIND_KERNEL, &Kernel)); +} + +TEST_P(olGetSymbolKernelTest, InvalidNullKernelPointer) { + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, + olGetSymbol(Program, "foo", OL_SYMBOL_KIND_KERNEL, nullptr)); +} + +TEST_P(olGetSymbolKernelTest, InvalidKernelName) { + ol_symbol_handle_t Kernel = nullptr; + ASSERT_ERROR(OL_ERRC_NOT_FOUND, olGetSymbol(Program, "invalid_kernel_name", + OL_SYMBOL_KIND_KERNEL, &Kernel)); +} + +TEST_P(olGetSymbolKernelTest, InvalidKind) { + ol_symbol_handle_t Kernel = nullptr; + ASSERT_ERROR( + OL_ERRC_INVALID_ENUMERATION, + olGetSymbol(Program, "foo", OL_SYMBOL_KIND_FORCE_UINT32, &Kernel)); +} + +TEST_P(olGetSymbolGlobalTest, Success) { + ol_symbol_handle_t Global = nullptr; + ASSERT_SUCCESS( + olGetSymbol(Program, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global)); + ASSERT_NE(Global, nullptr); +} + +TEST_P(olGetSymbolGlobalTest, InvalidNullProgram) { + ol_symbol_handle_t Global = nullptr; + ASSERT_ERROR( + OL_ERRC_INVALID_NULL_HANDLE, + olGetSymbol(nullptr, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global)); +} + +TEST_P(olGetSymbolGlobalTest, InvalidNullGlobalPointer) { + ASSERT_ERROR( + OL_ERRC_INVALID_NULL_POINTER, + olGetSymbol(Program, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, nullptr)); +} + +TEST_P(olGetSymbolGlobalTest, InvalidGlobalName) { + ol_symbol_handle_t Global = nullptr; + ASSERT_ERROR(OL_ERRC_NOT_FOUND, + olGetSymbol(Program, "invalid_global", + OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global)); +} diff --git a/offload/unittests/OffloadAPI/symbol/olGetSymbolInfo.cpp b/offload/unittests/OffloadAPI/symbol/olGetSymbolInfo.cpp new file mode 100644 index 0000000000000..ed8f4716974cd --- /dev/null +++ b/offload/unittests/OffloadAPI/symbol/olGetSymbolInfo.cpp @@ -0,0 +1,93 @@ +//===------- Offload API tests - olGetSymbolInfo --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "../common/Fixtures.hpp" + +using olGetSymbolInfoKernelTest = OffloadKernelTest; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olGetSymbolInfoKernelTest); + +using olGetSymbolInfoGlobalTest = OffloadGlobalTest; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olGetSymbolInfoGlobalTest); + +TEST_P(olGetSymbolInfoKernelTest, SuccessKind) { + ol_symbol_kind_t RetrievedKind; + ASSERT_SUCCESS(olGetSymbolInfo(Kernel, OL_SYMBOL_INFO_KIND, + sizeof(RetrievedKind), &RetrievedKind)); + ASSERT_EQ(RetrievedKind, OL_SYMBOL_KIND_KERNEL); +} + +TEST_P(olGetSymbolInfoGlobalTest, SuccessKind) { + ol_symbol_kind_t RetrievedKind; + ASSERT_SUCCESS(olGetSymbolInfo(Global, OL_SYMBOL_INFO_KIND, + sizeof(RetrievedKind), &RetrievedKind)); + ASSERT_EQ(RetrievedKind, OL_SYMBOL_KIND_GLOBAL_VARIABLE); +} + +TEST_P(olGetSymbolInfoKernelTest, InvalidAddress) { + void *RetrievedAddr; + ASSERT_ERROR(OL_ERRC_SYMBOL_KIND, + olGetSymbolInfo(Kernel, OL_SYMBOL_INFO_GLOBAL_VARIABLE_ADDRESS, + sizeof(RetrievedAddr), &RetrievedAddr)); +} + +TEST_P(olGetSymbolInfoGlobalTest, SuccessAddress) { + void *RetrievedAddr = nullptr; + ASSERT_SUCCESS(olGetSymbolInfo(Global, OL_SYMBOL_INFO_GLOBAL_VARIABLE_ADDRESS, + sizeof(RetrievedAddr), &RetrievedAddr)); + ASSERT_NE(RetrievedAddr, nullptr); +} + +TEST_P(olGetSymbolInfoKernelTest, InvalidSize) { + size_t RetrievedSize; + ASSERT_ERROR(OL_ERRC_SYMBOL_KIND, + olGetSymbolInfo(Kernel, OL_SYMBOL_INFO_GLOBAL_VARIABLE_SIZE, + sizeof(RetrievedSize), &RetrievedSize)); +} + +TEST_P(olGetSymbolInfoGlobalTest, SuccessSize) { + size_t RetrievedSize = 0; + ASSERT_SUCCESS(olGetSymbolInfo(Global, OL_SYMBOL_INFO_GLOBAL_VARIABLE_SIZE, + sizeof(RetrievedSize), &RetrievedSize)); + ASSERT_EQ(RetrievedSize, 64 * sizeof(uint32_t)); +} + +TEST_P(olGetSymbolInfoKernelTest, InvalidNullHandle) { + ol_symbol_kind_t RetrievedKind; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, + olGetSymbolInfo(nullptr, OL_SYMBOL_INFO_KIND, + sizeof(RetrievedKind), &RetrievedKind)); +} + +TEST_P(olGetSymbolInfoKernelTest, InvalidSymbolInfoEnumeration) { + ol_symbol_kind_t RetrievedKind; + ASSERT_ERROR(OL_ERRC_INVALID_ENUMERATION, + olGetSymbolInfo(Kernel, OL_SYMBOL_INFO_FORCE_UINT32, + sizeof(RetrievedKind), &RetrievedKind)); +} + +TEST_P(olGetSymbolInfoKernelTest, InvalidSizeZero) { + ol_symbol_kind_t RetrievedKind; + ASSERT_ERROR(OL_ERRC_INVALID_SIZE, + olGetSymbolInfo(Kernel, OL_SYMBOL_INFO_KIND, 0, &RetrievedKind)); +} + +TEST_P(olGetSymbolInfoKernelTest, InvalidSizeSmall) { + ol_symbol_kind_t RetrievedKind; + ASSERT_ERROR(OL_ERRC_INVALID_SIZE, + olGetSymbolInfo(Kernel, OL_SYMBOL_INFO_KIND, + sizeof(RetrievedKind) - 1, &RetrievedKind)); +} + +TEST_P(olGetSymbolInfoKernelTest, InvalidNullPointerPropValue) { + ol_symbol_kind_t RetrievedKind; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, + olGetSymbolInfo(Kernel, OL_SYMBOL_INFO_KIND, + sizeof(RetrievedKind), nullptr)); +} diff --git a/offload/unittests/OffloadAPI/symbol/olGetSymbolInfoSize.cpp b/offload/unittests/OffloadAPI/symbol/olGetSymbolInfoSize.cpp new file mode 100644 index 0000000000000..ec011865cc6ad --- /dev/null +++ b/offload/unittests/OffloadAPI/symbol/olGetSymbolInfoSize.cpp @@ -0,0 +1,60 @@ +//===------- Offload API tests - olGetSymbolInfoSize ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "../common/Fixtures.hpp" + +using olGetSymbolInfoSizeKernelTest = OffloadKernelTest; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olGetSymbolInfoSizeKernelTest); + +using olGetSymbolInfoSizeGlobalTest = OffloadGlobalTest; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olGetSymbolInfoSizeGlobalTest); + +TEST_P(olGetSymbolInfoSizeKernelTest, SuccessKind) { + size_t Size = 0; + ASSERT_SUCCESS(olGetSymbolInfoSize(Kernel, OL_SYMBOL_INFO_KIND, &Size)); + ASSERT_EQ(Size, sizeof(ol_symbol_kind_t)); +} + +TEST_P(olGetSymbolInfoSizeGlobalTest, SuccessKind) { + size_t Size = 0; + ASSERT_SUCCESS(olGetSymbolInfoSize(Global, OL_SYMBOL_INFO_KIND, &Size)); + ASSERT_EQ(Size, sizeof(ol_symbol_kind_t)); +} + +TEST_P(olGetSymbolInfoSizeGlobalTest, SuccessAddress) { + size_t Size = 0; + ASSERT_SUCCESS(olGetSymbolInfoSize( + Global, OL_SYMBOL_INFO_GLOBAL_VARIABLE_ADDRESS, &Size)); + ASSERT_EQ(Size, sizeof(void *)); +} + +TEST_P(olGetSymbolInfoSizeGlobalTest, SuccessSize) { + size_t Size = 0; + ASSERT_SUCCESS( + olGetSymbolInfoSize(Global, OL_SYMBOL_INFO_GLOBAL_VARIABLE_SIZE, &Size)); + ASSERT_EQ(Size, sizeof(size_t)); +} + +TEST_P(olGetSymbolInfoSizeKernelTest, InvalidNullHandle) { + size_t Size = 0; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, + olGetSymbolInfoSize(nullptr, OL_SYMBOL_INFO_KIND, &Size)); +} + +TEST_P(olGetSymbolInfoSizeKernelTest, InvalidSymbolInfoEnumeration) { + size_t Size = 0; + ASSERT_ERROR(OL_ERRC_INVALID_ENUMERATION, + olGetSymbolInfoSize(Kernel, OL_SYMBOL_INFO_FORCE_UINT32, &Size)); +} + +TEST_P(olGetSymbolInfoSizeKernelTest, InvalidNullPointer) { + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, + olGetSymbolInfoSize(Kernel, OL_SYMBOL_INFO_KIND, nullptr)); +} diff --git a/third-party/siphash/include/siphash/SipHash.h b/third-party/siphash/include/siphash/SipHash.h index 9653e9428b123..ca4fe45e4fddf 100644 --- a/third-party/siphash/include/siphash/SipHash.h +++ b/third-party/siphash/include/siphash/SipHash.h @@ -104,25 +104,24 @@ void siphash(const unsigned char *in, uint64_t inlen, switch (left) { case 7: b |= ((uint64_t)ni[6]) << 48; - /* FALLTHRU */ + [[fallthrough]]; case 6: b |= ((uint64_t)ni[5]) << 40; - /* FALLTHRU */ + [[fallthrough]]; case 5: b |= ((uint64_t)ni[4]) << 32; - /* FALLTHRU */ + [[fallthrough]]; case 4: b |= ((uint64_t)ni[3]) << 24; - /* FALLTHRU */ + [[fallthrough]]; case 3: b |= ((uint64_t)ni[2]) << 16; - /* FALLTHRU */ + [[fallthrough]]; case 2: b |= ((uint64_t)ni[1]) << 8; - /* FALLTHRU */ + [[fallthrough]]; case 1: b |= ((uint64_t)ni[0]); - /* FALLTHRU */ break; case 0: break; diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index a80d8d430ec5c..d259f391069a4 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2083,11 +2083,10 @@ libc_support_library( name = "expxf16", hdrs = ["src/math/generic/expxf16.h"], deps = [ - ":__support_cpp_array", ":__support_fputil_cast", ":__support_fputil_fp_bits", ":__support_fputil_nearest_integer", - ":__support_fputil_polyeval", + ":__support_math_expf16_utils", ], ) @@ -2117,6 +2116,39 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_expf16_utils", + hdrs = ["src/__support/math/expf16_utils.h"], + deps = [ + ":__support_cpp_array", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_macros_attributes", + ":llvm_libc_macros_float16_macros" + ], +) + +libc_support_library( + name = "__support_math_expf16", + hdrs = ["src/__support/math/expf16.h"], + deps = [ + ":__support_common", + ":__support_libc_errno", + ":__support_cpp_array", + ":__support_fputil_cast", + ":__support_fputil_except_value_utils", + ":__support_fputil_fenv_impl", + ":__support_fputil_fp_bits", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_fputil_rounding_mode", + ":__support_macros_optimization", + ":__support_math_expf16_utils", + ":llvm_libc_macros_float16_macros" + ], +) + ############################### complex targets ################################ libc_function( @@ -2722,6 +2754,7 @@ libc_math_function( libc_math_function( name = "expf16", additional_deps = [ + ":__support_math_expf16", ":expxf16", ], ) diff --git a/utils/bazel/llvm-project-overlay/llvm/config.bzl b/utils/bazel/llvm-project-overlay/llvm/config.bzl index ba9db05c651a7..2309175d04a9b 100644 --- a/utils/bazel/llvm-project-overlay/llvm/config.bzl +++ b/utils/bazel/llvm-project-overlay/llvm/config.bzl @@ -112,6 +112,8 @@ llvm_config_defines = os_defines + builtin_thread_pointer + select({ "LLVM_VERSION_MINOR={}".format(LLVM_VERSION_MINOR), "LLVM_VERSION_PATCH={}".format(LLVM_VERSION_PATCH), r'LLVM_VERSION_STRING=\"{}\"'.format(PACKAGE_VERSION), + # Set globally in HandleLLVMOptions.cmake + "EXPERIMENTAL_KEY_INSTRUCTIONS", # These shouldn't be needed by the C++11 standard, but are for some # platforms (e.g. glibc < 2.18. See # https://sourceware.org/bugzilla/show_bug.cgi?id=15366). These are also diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 046ff102cda59..37c865ca4c4ca 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5435,6 +5435,7 @@ cc_library( ":Transforms", ":VCIXToLLVMIRTranslation", ":VectorDialect", + ":XeVMDialect", ":config", "//llvm:Core", "//llvm:MC", diff --git a/utils/bazel/llvm_configs/abi-breaking.h.cmake b/utils/bazel/llvm_configs/abi-breaking.h.cmake index 318bd015f80d2..2d27e02b1d545 100644 --- a/utils/bazel/llvm_configs/abi-breaking.h.cmake +++ b/utils/bazel/llvm_configs/abi-breaking.h.cmake @@ -12,9 +12,6 @@ #ifndef LLVM_ABI_BREAKING_CHECKS_H #define LLVM_ABI_BREAKING_CHECKS_H -// Compiler.h is required for LLVM_ABI definition. -#include "llvm/Support/Compiler.h" - /* Define to enable checks that alter the LLVM C++ ABI */ #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -46,12 +43,12 @@ #endif namespace llvm { #if LLVM_ENABLE_ABI_BREAKING_CHECKS -LLVM_ABI extern int EnableABIBreakingChecks; +extern int EnableABIBreakingChecks; LLVM_HIDDEN_VISIBILITY __attribute__((weak)) int *VerifyEnableABIBreakingChecks = &EnableABIBreakingChecks; #else -LLVM_ABI extern int DisableABIBreakingChecks; +extern int DisableABIBreakingChecks; LLVM_HIDDEN_VISIBILITY __attribute__((weak)) int *VerifyDisableABIBreakingChecks = &DisableABIBreakingChecks;